[training_utils] fix: stop using math naming under reward score" (#3378)

2025-10-20 13:43:50 +08:00 · 2025-09-07 04:57:24 +03:00
parent cb01f10ba0
commit f346f96d29
10 changed files with 13 additions and 13 deletions
--- a/examples/data_preprocess/math_dataset.py
+++ b/examples/data_preprocess/math_dataset.py
@ -22,7 +22,7 @@ import os
 import datasets

 from verl.utils.hdfs_io import copy, makedirs
-from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
+from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed


 def extract_solution(solution_str):
--- a/examples/split_placement/main_ppo_split.py
+++ b/examples/split_placement/main_ppo_split.py
@ -23,14 +23,14 @@ from split_monkey_patch import fit

 from verl import DataProto
 from verl.trainer.ppo.ray_trainer import RayPPOTrainer
-from verl.utils.reward_score import gsm8k, math
+from verl.utils.reward_score import gsm8k, math_reward


 def _select_rm_score_fn(data_source):
    if data_source == "openai/gsm8k":
        return gsm8k.compute_score
    elif data_source == "lighteval/MATH":
-        return math.compute_score
+        return math_reward.compute_score
    else:
        raise NotImplementedError

--- a/recipe/char_count/create_dataset.py
+++ b/recipe/char_count/create_dataset.py
@ -142,7 +142,7 @@ if __name__ == "__main__":

    rl_test_dataset = {"prompt": [], "data_source": [], "ability": [], "reward_model": [], "extra_info": []}

-    from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
+    from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed

    for o in train_outputs:
        prompt = o[0]
--- a/recipe/char_count/reward_function.py
+++ b/recipe/char_count/reward_function.py
@ -16,15 +16,15 @@
 Reward function
 """

-from verl.utils.reward_score import math
+from verl.utils.reward_score import math_reward


 def char_count_reward_function(data_source, solution_str, ground_truth, extra_info=None):
    try:
-        last_boxed_string = math.last_boxed_only_string(solution_str)
+        last_boxed_string = math_reward.last_boxed_only_string(solution_str)
        if last_boxed_string is None:
            return 0
-        solution = math.remove_boxed(last_boxed_string)
+        solution = math_reward.remove_boxed(last_boxed_string)
        if solution == ground_truth:
            return 1
        else:
--- a/recipe/genrm_remote/reward_function.py
+++ b/recipe/genrm_remote/reward_function.py
@ -17,7 +17,7 @@ from time import sleep

 import requests

-from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
+from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed

 BASE_URL = "http://localhost:30000"
 API_KEY = "EMPTY"
--- a/recipe/r1/reward_score.py
+++ b/recipe/r1/reward_score.py
@ -15,9 +15,9 @@

 def reward_func(data_source, solution_str, ground_truth, extra_info=None):
    if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
-        from recipe.r1.tasks import math
+        from recipe.r1.tasks import math_reward

-        return math.compute_score(solution_str, ground_truth)
+        return math_reward.compute_score(solution_str, ground_truth)
    elif data_source == "Idavidrein/gpqa":
        from recipe.r1.tasks import gpqa

--- a/recipe/r1/tasks/math_reward.py
+++ b/recipe/r1/tasks/math_reward.py
--- a/verl/utils/reward_score/init.py
+++ b/verl/utils/reward_score/init.py
@ -45,9 +45,9 @@ def default_compute_score(

        res = gsm8k.compute_score(solution_str, ground_truth)
    elif data_source in ["lighteval/MATH", "DigitalLearningGmbH/MATH-lighteval", "HuggingFaceH4/MATH-500"]:
-        from . import math
+        from . import math_reward

-        res = math.compute_score(solution_str, ground_truth)
+        res = math_reward.compute_score(solution_str, ground_truth)
        # [Optional] Math-Verify Integration
        # For enhanced accuracy, consider utilizing Math-Verify (https://github.com/huggingface/Math-Verify).
        # Note: Math-Verify needs to be manually installed via pip: `pip install math-verify`.
--- a/verl/utils/reward_score/math_batch.py
+++ b/verl/utils/reward_score/math_batch.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .math import compute_score
+from .math_reward import compute_score


 def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
--- a/verl/utils/reward_score/math_reward.py
+++ b/verl/utils/reward_score/math_reward.py