mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 05:33:49 +08:00
### What does this PR do? Adds a new `AbstractRewardManager` class to codify the interface for a reward manager. ### Checklist Before Starting - [ ] Search for similar PRs. Paste at least one query link here: ... - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. ### API and Usage Example > Demonstrate how the API changes if any, and provide usage example(s) if possible. ```python # Add code snippet or script demonstrating how to use this ``` ### Design & Code Changes > Demonstrate the high-level design if this PR is complex, and list the specific changes. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [x ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ x] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [x ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ x] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ x] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).)
114 lines
3.1 KiB
TOML
114 lines
3.1 KiB
TOML
# -------------------------------
|
|
# build-system
|
|
# -------------------------------
|
|
[build-system]
|
|
requires = [
|
|
"setuptools>=61.0",
|
|
"wheel"
|
|
]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
# -------------------------------
|
|
# project (PEP 621 metadata)
|
|
# -------------------------------
|
|
[project]
|
|
name = "verl"
|
|
# We'll mark the version as "dynamic" because it's read from the file "verl/version/version"
|
|
# (PEP 621 calls this "dynamic version").
|
|
# The actual version is specified in the [tool.setuptools.dynamic] section below.
|
|
dynamic = ["version", "dependencies", "optional-dependencies", "authors", "urls"]
|
|
|
|
description = "verl: Volcano Engine Reinforcement Learning for LLM"
|
|
license = {text = "Apache-2.0"} # Changed from file to text format
|
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
requires-python = ">=3.10"
|
|
|
|
# -------------------------------
|
|
# tool.ruff - Linting configuration
|
|
# -------------------------------
|
|
[tool.ruff]
|
|
# Note: While the formatter will attempt to format lines such that they remain within the line-length,
|
|
# it isn't a hard upper bound, and formatted lines may exceed the line-length.
|
|
line-length = 120
|
|
exclude = ["tests/workers/rollout/test_sglang_async_rollout_sf_tools.py", "scripts/legacy_model_merger.py"]
|
|
|
|
[tool.ruff.lint]
|
|
isort = {known-first-party = ["verl"]}
|
|
# c.f. https://github.com/vllm-project/vllm/blob/ce8d6b75fc0586045df75ee1568a5b5f9957251b/pyproject.toml
|
|
select = [
|
|
# pycodestyle
|
|
"E",
|
|
# Pyflakes
|
|
"F",
|
|
# pyupgrade
|
|
"UP",
|
|
# flake8-bugbear
|
|
"B",
|
|
# isort
|
|
"I",
|
|
"G",
|
|
]
|
|
ignore = [
|
|
# star imports
|
|
"F405", "F403",
|
|
# lambda expression assignment
|
|
"E731",
|
|
# Loop control variable not used within loop body
|
|
"B007",
|
|
# f-string format
|
|
"UP032",
|
|
# `.log()` statement uses f-string
|
|
"G004",
|
|
# X | None for type annotations
|
|
"UP045",
|
|
# deprecated import
|
|
"UP035",
|
|
]
|
|
|
|
# -------------------------------
|
|
# tool.mypy - typechecking config
|
|
# -------------------------------
|
|
[tool.mypy]
|
|
pretty = true
|
|
ignore_missing_imports = true
|
|
explicit_package_bases = true
|
|
follow_imports = "skip"
|
|
|
|
# Blanket silence
|
|
ignore_errors = true
|
|
|
|
[[tool.mypy.overrides]]
|
|
module = [
|
|
"verl.trainer.config.algorithm",
|
|
"verl.trainer.ppo.core_algos",
|
|
"verl.trainer.ppo.reward",
|
|
"verl.workers.reward_manager",
|
|
"verl.workers.reward_manager.*",
|
|
]
|
|
ignore_errors = false
|
|
|
|
# -------------------------------
|
|
# tool.setuptools - Additional config
|
|
# -------------------------------
|
|
[tool.setuptools]
|
|
# True means `setuptools` will attempt to include all relevant files in package_data automatically.
|
|
# This corresponds to `include_package_data=True` in setup.py.
|
|
include-package-data = true
|
|
|
|
# We read the version from a file in 'verl/version/version'
|
|
[tool.setuptools.dynamic]
|
|
version = {file = "verl/version/version"}
|
|
|
|
# If you need to mimic `package_dir={'': '.'}`:
|
|
[tool.setuptools.package-dir]
|
|
"" = "."
|
|
|
|
# If you need to include specific non-Python data (like YAML files or version file):
|
|
# This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']}
|
|
[tool.setuptools.package-data]
|
|
verl = [
|
|
"version/*",
|
|
"trainer/config/*.yaml",
|
|
"trainer/config/*/*.yaml",
|
|
]
|