From f346f96d29dab1575532b02a78b5a4a659dce9ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=84=8D=F0=9D=95=A0=F0=9D=95=9D=F0=9D=95=9D=F0=9D=95=A0?= =?UTF-8?q?=F0=9D=95=A8=20=F0=9D=95=84=F0=9D=95=92=F0=9D=95=9F?= Date: Sun, 7 Sep 2025 04:57:24 +0300 Subject: [PATCH] [training_utils] fix: stop using `math` naming under reward score" (#3378) --- examples/data_preprocess/math_dataset.py | 2 +- examples/split_placement/main_ppo_split.py | 4 ++-- recipe/char_count/create_dataset.py | 2 +- recipe/char_count/reward_function.py | 6 +++--- recipe/genrm_remote/reward_function.py | 2 +- recipe/r1/reward_score.py | 4 ++-- recipe/r1/tasks/{math.py => math_reward.py} | 0 verl/utils/reward_score/__init__.py | 4 ++-- verl/utils/reward_score/math_batch.py | 2 +- verl/utils/reward_score/{math.py => math_reward.py} | 0 10 files changed, 13 insertions(+), 13 deletions(-) rename recipe/r1/tasks/{math.py => math_reward.py} (100%) rename verl/utils/reward_score/{math.py => math_reward.py} (100%) diff --git a/examples/data_preprocess/math_dataset.py b/examples/data_preprocess/math_dataset.py index 3418a5d6c..343a83436 100644 --- a/examples/data_preprocess/math_dataset.py +++ b/examples/data_preprocess/math_dataset.py @@ -22,7 +22,7 @@ import os import datasets from verl.utils.hdfs_io import copy, makedirs -from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed +from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed def extract_solution(solution_str): diff --git a/examples/split_placement/main_ppo_split.py b/examples/split_placement/main_ppo_split.py index d8d880ade..3e610abd2 100644 --- a/examples/split_placement/main_ppo_split.py +++ b/examples/split_placement/main_ppo_split.py @@ -23,14 +23,14 @@ from split_monkey_patch import fit from verl import DataProto from verl.trainer.ppo.ray_trainer import RayPPOTrainer -from verl.utils.reward_score import gsm8k, math +from verl.utils.reward_score import gsm8k, math_reward def _select_rm_score_fn(data_source): if data_source == "openai/gsm8k": return gsm8k.compute_score elif data_source == "lighteval/MATH": - return math.compute_score + return math_reward.compute_score else: raise NotImplementedError diff --git a/recipe/char_count/create_dataset.py b/recipe/char_count/create_dataset.py index 47571e023..985b1f03b 100644 --- a/recipe/char_count/create_dataset.py +++ b/recipe/char_count/create_dataset.py @@ -142,7 +142,7 @@ if __name__ == "__main__": rl_test_dataset = {"prompt": [], "data_source": [], "ability": [], "reward_model": [], "extra_info": []} - from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed + from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed for o in train_outputs: prompt = o[0] diff --git a/recipe/char_count/reward_function.py b/recipe/char_count/reward_function.py index 9bdffe2a5..7c87ea49a 100644 --- a/recipe/char_count/reward_function.py +++ b/recipe/char_count/reward_function.py @@ -16,15 +16,15 @@ Reward function """ -from verl.utils.reward_score import math +from verl.utils.reward_score import math_reward def char_count_reward_function(data_source, solution_str, ground_truth, extra_info=None): try: - last_boxed_string = math.last_boxed_only_string(solution_str) + last_boxed_string = math_reward.last_boxed_only_string(solution_str) if last_boxed_string is None: return 0 - solution = math.remove_boxed(last_boxed_string) + solution = math_reward.remove_boxed(last_boxed_string) if solution == ground_truth: return 1 else: diff --git a/recipe/genrm_remote/reward_function.py b/recipe/genrm_remote/reward_function.py index b2d3fbc2f..35b3af399 100644 --- a/recipe/genrm_remote/reward_function.py +++ b/recipe/genrm_remote/reward_function.py @@ -17,7 +17,7 @@ from time import sleep import requests -from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed +from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed BASE_URL = "http://localhost:30000" API_KEY = "EMPTY" diff --git a/recipe/r1/reward_score.py b/recipe/r1/reward_score.py index 2010665aa..9aeced911 100644 --- a/recipe/r1/reward_score.py +++ b/recipe/r1/reward_score.py @@ -15,9 +15,9 @@ def reward_func(data_source, solution_str, ground_truth, extra_info=None): if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]: - from recipe.r1.tasks import math + from recipe.r1.tasks import math_reward - return math.compute_score(solution_str, ground_truth) + return math_reward.compute_score(solution_str, ground_truth) elif data_source == "Idavidrein/gpqa": from recipe.r1.tasks import gpqa diff --git a/recipe/r1/tasks/math.py b/recipe/r1/tasks/math_reward.py similarity index 100% rename from recipe/r1/tasks/math.py rename to recipe/r1/tasks/math_reward.py diff --git a/verl/utils/reward_score/__init__.py b/verl/utils/reward_score/__init__.py index 5151c5151..44b68aba7 100644 --- a/verl/utils/reward_score/__init__.py +++ b/verl/utils/reward_score/__init__.py @@ -45,9 +45,9 @@ def default_compute_score( res = gsm8k.compute_score(solution_str, ground_truth) elif data_source in ["lighteval/MATH", "DigitalLearningGmbH/MATH-lighteval", "HuggingFaceH4/MATH-500"]: - from . import math + from . import math_reward - res = math.compute_score(solution_str, ground_truth) + res = math_reward.compute_score(solution_str, ground_truth) # [Optional] Math-Verify Integration # For enhanced accuracy, consider utilizing Math-Verify (https://github.com/huggingface/Math-Verify). # Note: Math-Verify needs to be manually installed via pip: `pip install math-verify`. diff --git a/verl/utils/reward_score/math_batch.py b/verl/utils/reward_score/math_batch.py index ed080860a..20b38e1bb 100644 --- a/verl/utils/reward_score/math_batch.py +++ b/verl/utils/reward_score/math_batch.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .math import compute_score +from .math_reward import compute_score def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos): diff --git a/verl/utils/reward_score/math.py b/verl/utils/reward_score/math_reward.py similarity index 100% rename from verl/utils/reward_score/math.py rename to verl/utils/reward_score/math_reward.py