mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 13:43:50 +08:00
> [!WARNING]
> We are [immigrating to `ruff` as the linter and formatter and
`pre-commit` as the managing
tool](https://github.com/volcengine/verl/pull/1010).
>
> If your branch is based on a previous commit using `yapf` and
`pylint`, simply merging might trigger overwhelming linting errors,
while **you are only expected to resolve ones in the files related to
your PR**.
>
> To resolve this issue, please try the following workaround to only
include the files you **really changed** in the PR:
>
> 1. In your branch, fix linting and format with `ruff`: `ruff check
--fix && ruff-format`
> 2. Squash into a single commit in a new branch: `git reset --soft
$(git merge-base main HEAD) && git add -A && git commit -m "feat: ..."`
> 3. Merge with the latest main: `git merge origin/main`
> 4. Force push to your branch: `git push --force`
We add the reminder above to the documentation to tell contributors how
to avoid overwhelming linting errors.
### Motivation
According to dicussion in #896, this PR immigrates from yapf & pylint to
ruff based on pre-commit, which allows unified version control and
automatic hook on committing.
### Summary
The `pre-commit` hook and CI
- checks staged / committed files in commits / PR's
- checks all files each month (This should fail before we fix all the
files by the ruff standard)
### Explanation for the Failing CI Workflow `pre-commit`
For now, we only apply `ruff format` and `ruff check --fix` **without
resolving all the errors**, since there are too many errors to resolve,
which causes the CI workflow `pre-commit` fails.
For resolving the remaining errors, we leave to future commits.
Specifically, the `pre-commit` hook and CI will require every commit to
fix its related files with `ruff`, which will fix all the files
incrementally.
### Reviewing Suggestion
The commit
3d93f51ba8
is huge since we apply `ruff` to all the files. To review the main
changes, please check the commits before and after it.
110 lines
3.5 KiB
Python
110 lines
3.5 KiB
Python
# Copyright 2024 Bytedance Ltd. and/or its affiliates
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Offline evaluate the performance of a generated file using reward model and ground truth verifier.
|
|
The input is a parquet file that contains N generated sequences and (optional) the ground truth.
|
|
|
|
"""
|
|
|
|
from collections import defaultdict
|
|
|
|
import hydra
|
|
import numpy as np
|
|
import pandas as pd
|
|
import ray
|
|
from tqdm import tqdm
|
|
|
|
from verl.utils.fs import copy_to_local
|
|
|
|
|
|
def get_custom_reward_fn(config):
|
|
import importlib.util
|
|
import os
|
|
|
|
reward_fn_config = config.get("custom_reward_function") or {}
|
|
file_path = reward_fn_config.get("path")
|
|
if not file_path:
|
|
return None
|
|
|
|
if not os.path.exists(file_path):
|
|
raise FileNotFoundError(f"Reward function file '{file_path}' not found.")
|
|
|
|
spec = importlib.util.spec_from_file_location("custom_module", file_path)
|
|
module = importlib.util.module_from_spec(spec)
|
|
try:
|
|
spec.loader.exec_module(module)
|
|
except Exception as e:
|
|
raise RuntimeError(f"Error loading module from '{file_path}': {e}")
|
|
|
|
function_name = reward_fn_config.get("name")
|
|
|
|
if not hasattr(module, function_name):
|
|
raise AttributeError(f"Reward function '{function_name}' not found in '{file_path}'.")
|
|
|
|
print(f"using customized reward function '{function_name}' from '{file_path}'")
|
|
|
|
return getattr(module, function_name)
|
|
|
|
|
|
@ray.remote
|
|
def process_item(reward_fn, data_source, response_lst, reward_data):
|
|
ground_truth = reward_data["ground_truth"]
|
|
score_lst = [reward_fn(data_source, r, ground_truth) for r in response_lst]
|
|
return data_source, np.mean(score_lst)
|
|
|
|
|
|
@hydra.main(config_path="config", config_name="evaluation", version_base=None)
|
|
def main(config):
|
|
local_path = copy_to_local(config.data.path)
|
|
dataset = pd.read_parquet(local_path)
|
|
prompts = dataset[config.data.prompt_key]
|
|
responses = dataset[config.data.response_key]
|
|
data_sources = dataset[config.data.data_source_key]
|
|
reward_model_data = dataset[config.data.reward_model_key]
|
|
|
|
total = len(dataset)
|
|
|
|
# Initialize Ray
|
|
if not ray.is_initialized():
|
|
ray.init()
|
|
|
|
# evaluate test_score based on data source
|
|
data_source_reward = defaultdict(list)
|
|
compute_score = get_custom_reward_fn(config)
|
|
|
|
# Create remote tasks
|
|
remote_tasks = [
|
|
process_item.remote(compute_score, data_sources[i], responses[i], reward_model_data[i]) for i in range(total)
|
|
]
|
|
|
|
# Process results as they come in
|
|
with tqdm(total=total) as pbar:
|
|
while len(remote_tasks) > 0:
|
|
# Use ray.wait to get completed tasks
|
|
done_ids, remote_tasks = ray.wait(remote_tasks)
|
|
for result_id in done_ids:
|
|
data_source, score = ray.get(result_id)
|
|
data_source_reward[data_source].append(score)
|
|
pbar.update(1)
|
|
|
|
metric_dict = {}
|
|
for data_source, rewards in data_source_reward.items():
|
|
metric_dict[f"test_score/{data_source}"] = np.mean(rewards)
|
|
|
|
print(metric_dict)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|