[training_utils] fix: stop using math naming under reward score" (#3378)

This commit is contained in:
ℍ𝕠𝕝𝕝𝕠𝕨 𝕄𝕒𝕟
2025-09-07 04:57:24 +03:00
committed by GitHub
parent cb01f10ba0
commit f346f96d29
10 changed files with 13 additions and 13 deletions

View File

@ -22,7 +22,7 @@ import os
import datasets
from verl.utils.hdfs_io import copy, makedirs
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
def extract_solution(solution_str):

View File

@ -23,14 +23,14 @@ from split_monkey_patch import fit
from verl import DataProto
from verl.trainer.ppo.ray_trainer import RayPPOTrainer
from verl.utils.reward_score import gsm8k, math
from verl.utils.reward_score import gsm8k, math_reward
def _select_rm_score_fn(data_source):
if data_source == "openai/gsm8k":
return gsm8k.compute_score
elif data_source == "lighteval/MATH":
return math.compute_score
return math_reward.compute_score
else:
raise NotImplementedError

View File

@ -142,7 +142,7 @@ if __name__ == "__main__":
rl_test_dataset = {"prompt": [], "data_source": [], "ability": [], "reward_model": [], "extra_info": []}
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
for o in train_outputs:
prompt = o[0]

View File

@ -16,15 +16,15 @@
Reward function
"""
from verl.utils.reward_score import math
from verl.utils.reward_score import math_reward
def char_count_reward_function(data_source, solution_str, ground_truth, extra_info=None):
try:
last_boxed_string = math.last_boxed_only_string(solution_str)
last_boxed_string = math_reward.last_boxed_only_string(solution_str)
if last_boxed_string is None:
return 0
solution = math.remove_boxed(last_boxed_string)
solution = math_reward.remove_boxed(last_boxed_string)
if solution == ground_truth:
return 1
else:

View File

@ -17,7 +17,7 @@ from time import sleep
import requests
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
BASE_URL = "http://localhost:30000"
API_KEY = "EMPTY"

View File

@ -15,9 +15,9 @@
def reward_func(data_source, solution_str, ground_truth, extra_info=None):
if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
from recipe.r1.tasks import math
from recipe.r1.tasks import math_reward
return math.compute_score(solution_str, ground_truth)
return math_reward.compute_score(solution_str, ground_truth)
elif data_source == "Idavidrein/gpqa":
from recipe.r1.tasks import gpqa

View File

@ -45,9 +45,9 @@ def default_compute_score(
res = gsm8k.compute_score(solution_str, ground_truth)
elif data_source in ["lighteval/MATH", "DigitalLearningGmbH/MATH-lighteval", "HuggingFaceH4/MATH-500"]:
from . import math
from . import math_reward
res = math.compute_score(solution_str, ground_truth)
res = math_reward.compute_score(solution_str, ground_truth)
# [Optional] Math-Verify Integration
# For enhanced accuracy, consider utilizing Math-Verify (https://github.com/huggingface/Math-Verify).
# Note: Math-Verify needs to be manually installed via pip: `pip install math-verify`.

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .math import compute_score
from .math_reward import compute_score
def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):