mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 13:43:50 +08:00
[training_utils] fix: stop using math
naming under reward score" (#3378)
This commit is contained in:
@ -22,7 +22,7 @@ import os
|
||||
import datasets
|
||||
|
||||
from verl.utils.hdfs_io import copy, makedirs
|
||||
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
|
||||
from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
|
||||
|
||||
|
||||
def extract_solution(solution_str):
|
||||
|
@ -23,14 +23,14 @@ from split_monkey_patch import fit
|
||||
|
||||
from verl import DataProto
|
||||
from verl.trainer.ppo.ray_trainer import RayPPOTrainer
|
||||
from verl.utils.reward_score import gsm8k, math
|
||||
from verl.utils.reward_score import gsm8k, math_reward
|
||||
|
||||
|
||||
def _select_rm_score_fn(data_source):
|
||||
if data_source == "openai/gsm8k":
|
||||
return gsm8k.compute_score
|
||||
elif data_source == "lighteval/MATH":
|
||||
return math.compute_score
|
||||
return math_reward.compute_score
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
@ -142,7 +142,7 @@ if __name__ == "__main__":
|
||||
|
||||
rl_test_dataset = {"prompt": [], "data_source": [], "ability": [], "reward_model": [], "extra_info": []}
|
||||
|
||||
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
|
||||
from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
|
||||
|
||||
for o in train_outputs:
|
||||
prompt = o[0]
|
||||
|
@ -16,15 +16,15 @@
|
||||
Reward function
|
||||
"""
|
||||
|
||||
from verl.utils.reward_score import math
|
||||
from verl.utils.reward_score import math_reward
|
||||
|
||||
|
||||
def char_count_reward_function(data_source, solution_str, ground_truth, extra_info=None):
|
||||
try:
|
||||
last_boxed_string = math.last_boxed_only_string(solution_str)
|
||||
last_boxed_string = math_reward.last_boxed_only_string(solution_str)
|
||||
if last_boxed_string is None:
|
||||
return 0
|
||||
solution = math.remove_boxed(last_boxed_string)
|
||||
solution = math_reward.remove_boxed(last_boxed_string)
|
||||
if solution == ground_truth:
|
||||
return 1
|
||||
else:
|
||||
|
@ -17,7 +17,7 @@ from time import sleep
|
||||
|
||||
import requests
|
||||
|
||||
from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed
|
||||
from verl.utils.reward_score.math_reward import last_boxed_only_string, remove_boxed
|
||||
|
||||
BASE_URL = "http://localhost:30000"
|
||||
API_KEY = "EMPTY"
|
||||
|
@ -15,9 +15,9 @@
|
||||
|
||||
def reward_func(data_source, solution_str, ground_truth, extra_info=None):
|
||||
if data_source in ["Maxwell-Jia/AIME_2024", "opencompass/cnmo2024_en", "opencompass/cnmo2024_zh"]:
|
||||
from recipe.r1.tasks import math
|
||||
from recipe.r1.tasks import math_reward
|
||||
|
||||
return math.compute_score(solution_str, ground_truth)
|
||||
return math_reward.compute_score(solution_str, ground_truth)
|
||||
elif data_source == "Idavidrein/gpqa":
|
||||
from recipe.r1.tasks import gpqa
|
||||
|
||||
|
@ -45,9 +45,9 @@ def default_compute_score(
|
||||
|
||||
res = gsm8k.compute_score(solution_str, ground_truth)
|
||||
elif data_source in ["lighteval/MATH", "DigitalLearningGmbH/MATH-lighteval", "HuggingFaceH4/MATH-500"]:
|
||||
from . import math
|
||||
from . import math_reward
|
||||
|
||||
res = math.compute_score(solution_str, ground_truth)
|
||||
res = math_reward.compute_score(solution_str, ground_truth)
|
||||
# [Optional] Math-Verify Integration
|
||||
# For enhanced accuracy, consider utilizing Math-Verify (https://github.com/huggingface/Math-Verify).
|
||||
# Note: Math-Verify needs to be manually installed via pip: `pip install math-verify`.
|
||||
|
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from .math import compute_score
|
||||
from .math_reward import compute_score
|
||||
|
||||
|
||||
def compute_score_batched(data_sources, solution_strs, ground_truths, extra_infos):
|
||||
|
Reference in New Issue
Block a user