From f346f96d29dab1575532b02a78b5a4a659dce9ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=84=8D=F0=9D=95=A0=F0=9D=95=9D=F0=9D=95=9D=F0=9D=95=A0?=
 =?UTF-8?q?=F0=9D=95=A8=20=F0=9D=95=84=F0=9D=95=92=F0=9D=95=9F?=
 <hollowman@opensuse.org>
Date: Sun, 7 Sep 2025 04:57:24 +0300
Subject: [PATCH] [training_utils] fix: stop using `math` naming under reward
 score" (#3378)

---
 examples/data_preprocess/math_dataset.py            | 2 +-
 examples/split_placement/main_ppo_split.py          | 4 ++--
 recipe/char_count/create_dataset.py                 | 2 +-
 recipe/char_count/reward_function.py                | 6 +++---
 recipe/genrm_remote/reward_function.py              | 2 +-
 recipe/r1/reward_score.py                           | 4 ++--
 recipe/r1/tasks/{math.py => math_reward.py}         | 0
 verl/utils/reward_score/__init__.py                 | 4 ++--
 verl/utils/reward_score/math_batch.py               | 2 +-
 verl/utils/reward_score/{math.py => math_reward.py} | 0
 10 files changed, 13 insertions(+), 13 deletions(-)
 rename recipe/r1/tasks/{math.py => math_reward.py} (100%)
 rename verl/utils/reward_score/{math.py => math_reward.py} (100%)

diff --git a/examples/data_preprocess/math_dataset.py b/examples/data_preprocess/math_dataset.py
index 3418a5d6c..343a83436 100644
--- a/examples/data_preprocess/math_dataset.py
+++ b/examples/data_preprocess/math_dataset.py
@@ -22,7 +22,7 @@ import os
 import datasets
 
 from verl.utils.hdfs_io import copy, makedirs
-from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
+from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
 
 
 def extract_solution(solution_str):
diff --git a/examples/split_placement/main_ppo_split.py b/examples/split_placement/main_ppo_split.py
index d8d880ade..3e610abd2 100644
--- a/examples/split_placement/main_ppo_split.py
+++ b/examples/split_placement/main_ppo_split.py
@@ -23,14 +23,14 @@ from split_monkey_patch import fit
 
 from verl import DataProto
 from verl.trainer.ppo.ray_trainer import RayPPOTrainer
-from verl.utils.reward_score import gsm8k, math
+from verl.utils.reward_score import gsm8k, math_reward
 
 
 def _select_rm_score_fn(data_source):
     if data_source == "openai/gsm8k":
         return gsm8k.compute_score
     elif data_source == "lighteval/MATH":
-        return math.compute_score
+        return math_reward.compute_score
     else:
         raise NotImplementedError
 
diff --git a/recipe/char_count/create_dataset.py b/recipe/char_count/create_dataset.py
index 47571e023..985b1f03b 100644
--- a/recipe/char_count/create_dataset.py
+++ b/recipe/char_count/create_dataset.py
@@ -142,7 +142,7 @@ if __name__ == "__main__":
 
     rl_test_dataset = {"prompt": [], "data_source": [], "ability": [], "reward_model": [], "extra_info": []}
 
-    from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
+    from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
 
     for o in train_outputs:
         prompt = o[0]
diff --git a/recipe/char_count/reward_function.py b/recipe/char_count/reward_function.py
index 9bdffe2a5..7c87ea49a 100644
--- a/recipe/char_count/reward_function.py
+++ b/recipe/char_count/reward_function.py
@@ -16,15 +16,15 @@
 Reward function
 """
 
-from verl.utils.reward_score import math
+from verl.utils.reward_score import math_reward
 
 
 def char_count_reward_function(data_source, solution_str, ground_truth, extra_info=None):
     try:
-        last_boxed_string = math.last_boxed_only_string(solution_str)
+        last_boxed_string = math_reward.last_boxed_only_string(solution_str)
         if last_boxed_string is None:
             return 0
-        solution = math.remove_boxed(last_boxed_string)
+        solution = math_reward.remove_boxed(last_boxed_string)
         if solution == ground_truth:
             return 1
         else:
diff --git a/recipe/genrm_remote/reward_function.py b/recipe/genrm_remote/reward_function.py
index b2d3fbc2f..35b3af399 100644
--- a/recipe/genrm_remote/reward_function.py
+++ b/recipe/genrm_remote/reward_function.py
@@ -17,7 +17,7 @@ from time import sleep
 
 import requests
 
-from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
+from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
 
 BASE_URL = "http://localhost:30000"
 API_KEY = "EMPTY"
diff --git a/recipe/r1/reward_score.py b/recipe/r1/reward_score.py
index 2010665aa..9aeced911 100644
--- a/recipe/r1/reward_score.py
+++ b/recipe/r1/reward_score.py
@@ -15,9 +15,9 @@
 
 def reward_func(data_source, solution_str, ground_truth, extra_info=None):
     if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
-        from recipe.r1.tasks import math
+        from recipe.r1.tasks import math_reward
 
-        return math.compute_score(solution_str, ground_truth)
+        return math_reward.compute_score(solution_str, ground_truth)
     elif data_source == "Idavidrein/gpqa":
         from recipe.r1.tasks import gpqa
 
diff --git a/recipe/r1/tasks/math.py b/recipe/r1/tasks/math_reward.py
similarity index 100%
rename from recipe/r1/tasks/math.py
rename to recipe/r1/tasks/math_reward.py
diff --git a/verl/utils/reward_score/__init__.py b/verl/utils/reward_score/__init__.py
index 5151c5151..44b68aba7 100644
--- a/verl/utils/reward_score/__init__.py
+++ b/verl/utils/reward_score/__init__.py
@@ -45,9 +45,9 @@ def default_compute_score(
 
         res = gsm8k.compute_score(solution_str, ground_truth)
     elif data_source in ["lighteval/MATH", "DigitalLearningGmbH/MATH-lighteval", "HuggingFaceH4/MATH-500"]:
-        from . import math
+        from . import math_reward
 
-        res = math.compute_score(solution_str, ground_truth)
+        res = math_reward.compute_score(solution_str, ground_truth)
         # [Optional] Math-Verify Integration
         # For enhanced accuracy, consider utilizing Math-Verify (https://github.com/huggingface/Math-Verify).
         # Note: Math-Verify needs to be manually installed via pip: `pip install math-verify`.
diff --git a/verl/utils/reward_score/math_batch.py b/verl/utils/reward_score/math_batch.py
index ed080860a..20b38e1bb 100644
--- a/verl/utils/reward_score/math_batch.py
+++ b/verl/utils/reward_score/math_batch.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .math import compute_score
+from .math_reward import compute_score
 
 
 def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
diff --git a/verl/utils/reward_score/math.py b/verl/utils/reward_score/math_reward.py
similarity index 100%
rename from verl/utils/reward_score/math.py
rename to verl/utils/reward_score/math_reward.py