mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 21:53:50 +08:00
[doc, ci] fix: fix sandbox doc and enhance CI trigger filter and doc error checking (#2267)
### What does this PR do? - fix sandbox doc - enhance CI trigger filter and doc error checking - add a rule to check PR description ### Checklist Before Starting - [x] Search for similar PRs. Paste at least one query link here: ... - [x] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [x] Read the [Contribute Guide](https://github.com/volcengine/verl?tab=readme-ov-file#contribution-guide). - [x] Apply [pre-commit checks](https://github.com/volcengine/verl?tab=readme-ov-file#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [x] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [x] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ).
This commit is contained in:
6
.github/workflows/check-pr-title.yml
vendored
6
.github/workflows/check-pr-title.yml
vendored
@ -50,3 +50,9 @@ jobs:
|
||||
run: python3 tests/special_sanity/check_pr_title.py
|
||||
env:
|
||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||
|
||||
- name: Run PR description checker
|
||||
run: python3 tests/special_sanity/check_pr_description.py
|
||||
env:
|
||||
PR_TITLE: ${{ github.event.pull_request.title }}
|
||||
GITHUB_EVENT_PATH: ${{ github.event_path }}
|
||||
|
4
.github/workflows/doc.yml
vendored
4
.github/workflows/doc.yml
vendored
@ -94,3 +94,7 @@ jobs:
|
||||
echo "🚨 Sphinx doc build contained WARNING. Please check inline emphasis is correct. See _build/sphinx.log for details"
|
||||
exit 1
|
||||
fi
|
||||
if grep -q "WARNING: Definition list ends without a blank line" _build/sphinx.log; then
|
||||
echo "🚨 Sphinx doc build contained WARNING. Please check if the indentation is correct. See _build/sphinx.log for details"
|
||||
exit 1
|
||||
fi
|
||||
|
4
.github/workflows/e2e_dapo.yml
vendored
4
.github/workflows/e2e_dapo.yml
vendored
@ -42,7 +42,7 @@ on:
|
||||
- main
|
||||
- v0.*
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- "verl/*.py"
|
||||
# Other entrypoints
|
||||
- "!examples/*trainer*"
|
||||
- "!tests/**"
|
||||
@ -50,6 +50,8 @@ on:
|
||||
- "!verl/trainer/fsdp_sft_trainer.py"
|
||||
# Megatron
|
||||
- "!verl/workers/**/megatron_*.py"
|
||||
- "!recipe/**"
|
||||
- "recipe/dapo"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
2
.github/workflows/e2e_eval_aime24.yml
vendored
2
.github/workflows/e2e_eval_aime24.yml
vendored
@ -51,6 +51,8 @@ on:
|
||||
- "!tests/**"
|
||||
- "!verl/trainer/main_*.py"
|
||||
- "!verl/trainer/fsdp_sft_trainer.py"
|
||||
- "!recipe/**"
|
||||
- "recipe/r1"
|
||||
- "!recipe/r1/README.md"
|
||||
pull_request:
|
||||
branches:
|
||||
|
5
.github/workflows/e2e_genrm_remote.yml
vendored
5
.github/workflows/e2e_genrm_remote.yml
vendored
@ -39,6 +39,11 @@ on:
|
||||
branches:
|
||||
- main
|
||||
- v0.*
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- "tests/**"
|
||||
- "!recipe/**"
|
||||
- "recipe/genrm_remote"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
@ -3,7 +3,10 @@
|
||||
## Build the docs
|
||||
|
||||
```bash
|
||||
# Install dependencies.
|
||||
# If you want to view auto-generated API docstring, please make sure verl is available in python path. For instance, install verl via:
|
||||
# pip install .. -e[test]
|
||||
|
||||
# Install dependencies needed for building docs.
|
||||
pip install -r requirements-docs.txt
|
||||
|
||||
# Build the docs.
|
||||
@ -16,4 +19,4 @@ make html
|
||||
```bash
|
||||
python -m http.server -d _build/html/
|
||||
```
|
||||
Launch your browser and navigate to http://localhost:8000 to view the documentation.
|
||||
Launch your browser and navigate to http://localhost:8000 to view the documentation. Alternatively you could drag the file `_build/html/index.html` to your local browser and view directly.
|
||||
|
@ -214,75 +214,79 @@ we provide e2e test scripts in `tests/special_e2e` folder, named `tests/special_
|
||||
|
||||
by setting 'trainer.rollout_data_dir' you can dump the rollout data to local disk. here is an sample taken from the rollout data:
|
||||
|
||||
{
|
||||
"input": "
|
||||
|
||||
system\nYou are a math expert. You are given a question and you need to solve it step by step. Reasoning step by step before any tool call. You should use the `calc_gsm8k_reward` tool after step by step solving the question, before generate final answer at least once and refine your answer if necessary. Put your final answer in the format of `#### <answer>`.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"code_interpreter\", \"description\": \"A tool for executing code.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"code\": {\"type\": \"string\", \"description\": \"The code to execute.\", \"enum\": null}}, \"required\": [\"code\"]}, \"strict\": false}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n
|
||||
|
||||
user\nJohn gets a bonus that's the same percentage every year. Last year he made $100,000 and got a $10,000 bonus. This year he makes $200,000. How much will John make this year when adding both his total pay and bonus together? Let's think step by step and output the final answer after `####`.\n
|
||||
|
||||
assistant\n",
|
||||
"output": "assistant\n<think>\nOkay, let's see. John gets a bonus that's the same percentage every year. Last year, he made $100,000 and got a $10,000 bonus. This year, he makes $200,000. The question is asking how much he'll make this year when adding both his total pay and bonus together.\n\nFirst, I need to figure out the percentage bonus he gets. Since the bonus is the same percentage each year, if I can find that percentage based on last year's details, I can apply it to this year's earnings to find this year's bonus. Then, I'll add that bonus to his total pay this year.\n\nLast year, he earned $100,000 and got a $10,000 bonus. To find the percentage, I can divide the bonus by his total earnings. So, 10,000 divided by 100,000. Let me calculate that.\n\n10,000 / 100,000 equals 0.1, which is 10% as a percentage. So, his bonus is 10% of his pay each year.\n\nThis year, he makes $200,000. So, the bonus this year would be 10% of 200,000. Let me compute that using the code interpreter to ensure accuracy.\n
|
||||
|
||||
<tool_call>\n{\"name\": \"code_interpreter\", \"arguments\": {\"code\": \"total_pay_this_year = 200000\\nbonus_percentage = 10 / 100\\nbonus_this_year = total_pay_this_year * bonus_percentage\\ntotal_income_this_year = total_pay_this_year + bonus_this_year\\nprint(total_income_this_year)\", \"executes\": \"True\"}}\n</tool_call>\n
|
||||
|
||||
tool\n220000.0\n\n
|
||||
|
||||
assistant\n<think>\nSo the code shows that John's total income this year would be $220,000. This is consistent with the 10% bonus on last year's pay of $100,000, which was $10,000. Therefore, this year's bonus of $20,000 added to his total pay of $200,000 results in a total income of $220,000.\n\nI need to verify that there are no other factors at play. The problem states it's a constant percentage bonus each year, so there's no mention of changing percentages or other variables. Therefore, applying a straightforward percentage of 10% on this year's partial salary amount would result in $20,000. Adding that to his total pay gives $220,000. The calculation is correct and aligns with the question's parameters.\n</think>\n\n#### 220000.0",
|
||||
"score": 0,
|
||||
"step": 1
|
||||
}
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"input": "
|
||||
|
||||
system\nYou are a math expert. You are given a question and you need to solve it step by step. Reasoning step by step before any tool call. You should use the `calc_gsm8k_reward` tool after step by step solving the question, before generate final answer at least once and refine your answer if necessary. Put your final answer in the format of `#### <answer>`.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"code_interpreter\", \"description\": \"A tool for executing code.\", \"parameters\": {\"type\": \"object\", \"properties\": {\"code\": {\"type\": \"string\", \"description\": \"The code to execute.\", \"enum\": null}}, \"required\": [\"code\"]}, \"strict\": false}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n
|
||||
|
||||
user\nJohn gets a bonus that's the same percentage every year. Last year he made $100,000 and got a $10,000 bonus. This year he makes $200,000. How much will John make this year when adding both his total pay and bonus together? Let's think step by step and output the final answer after `####`.\n
|
||||
|
||||
assistant\n",
|
||||
"output": "assistant\n<think>\nOkay, let's see. John gets a bonus that's the same percentage every year. Last year, he made $100,000 and got a $10,000 bonus. This year, he makes $200,000. The question is asking how much he'll make this year when adding both his total pay and bonus together.\n\nFirst, I need to figure out the percentage bonus he gets. Since the bonus is the same percentage each year, if I can find that percentage based on last year's details, I can apply it to this year's earnings to find this year's bonus. Then, I'll add that bonus to his total pay this year.\n\nLast year, he earned $100,000 and got a $10,000 bonus. To find the percentage, I can divide the bonus by his total earnings. So, 10,000 divided by 100,000. Let me calculate that.\n\n10,000 / 100,000 equals 0.1, which is 10% as a percentage. So, his bonus is 10% of his pay each year.\n\nThis year, he makes $200,000. So, the bonus this year would be 10% of 200,000. Let me compute that using the code interpreter to ensure accuracy.\n
|
||||
|
||||
<tool_call>\n{\"name\": \"code_interpreter\", \"arguments\": {\"code\": \"total_pay_this_year = 200000\\nbonus_percentage = 10 / 100\\nbonus_this_year = total_pay_this_year * bonus_percentage\\ntotal_income_this_year = total_pay_this_year + bonus_this_year\\nprint(total_income_this_year)\", \"executes\": \"True\"}}\n</tool_call>\n
|
||||
|
||||
tool\n220000.0\n\n
|
||||
|
||||
assistant\n<think>\nSo the code shows that John's total income this year would be $220,000. This is consistent with the 10% bonus on last year's pay of $100,000, which was $10,000. Therefore, this year's bonus of $20,000 added to his total pay of $200,000 results in a total income of $220,000.\n\nI need to verify that there are no other factors at play. The problem states it's a constant percentage bonus each year, so there's no mention of changing percentages or other variables. Therefore, applying a straightforward percentage of 10% on this year's partial salary amount would result in $20,000. Adding that to his total pay gives $220,000. The calculation is correct and aligns with the question's parameters.\n</think>\n\n#### 220000.0",
|
||||
"score": 0,
|
||||
"step": 1
|
||||
}
|
||||
|
||||
here is the readable format version:
|
||||
|
||||
[system]
|
||||
.. code-block:: python
|
||||
|
||||
You are a math expert. You are given a question and you need to solve it step by step. Reasoning step by step before any tool call. You should use the `calc_gsm8k_reward` tool after step by step solving the question, before generate final answer at least once and refine your answer if necessary. Put your final answer in the format of `#### <answer>`.
|
||||
|
||||
# Tools
|
||||
|
||||
You may call one or more functions to assist with the user query.
|
||||
|
||||
You are provided with function signatures within <tools></tools> XML tags:
|
||||
<tools>
|
||||
{"type": "function", "function": {"name": "code_interpreter", "description": "A tool for executing code.", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": "The code to execute.", "enum": null}}, "required": ["code"]}, "strict": false}}
|
||||
</tools>
|
||||
|
||||
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
<tool_call>
|
||||
{"name": <function-name>, "arguments": <args-json-object>}
|
||||
</tool_call>
|
||||
|
||||
[user]
|
||||
|
||||
John gets a bonus that's the same percentage every year. Last year he made $100,000 and got a $10,000 bonus. This year he makes $200,000. How much will John make this year when adding both his total pay and bonus together? Let's think step by step and output the final answer after `####`.
|
||||
|
||||
[assistant]
|
||||
|
||||
<think>
|
||||
Okay, let's see. John gets a bonus that's the same percentage every year. Last year, he made $100,000 and got a $10,000 bonus. This year, he makes $200,000. The question is asking how much he'll make this year when adding both his total pay and bonus together.
|
||||
|
||||
First, I need to figure out the percentage bonus he gets. Since the bonus is the same percentage each year, if I can find that percentage based on last year's details, I can apply it to this year's earnings to find this year's bonus. Then, I'll add that bonus to his total pay this year.
|
||||
|
||||
Last year, he earned $100,000 and got a $10,000 bonus. To find the percentage, I can divide the bonus by his total earnings. So, 10,000 divided by 100,000. Let me calculate that.
|
||||
|
||||
10,000 / 100,000 equals 0.1, which is 10% as a percentage. So, his bonus is 10% of his pay each year.
|
||||
|
||||
This year, he makes $200,000. So, the bonus this year would be 10% of 200,000. Let me compute that using the code interpreter to ensure accuracy.
|
||||
<tool_call>
|
||||
{"name": "code_interpreter", "arguments": {"code": "total_pay_this_year = 200000\nbonus_percentage = 10 / 100\nbonus_this_year = total_pay_this_year * bonus_percentage\ntotal_income_this_year = total_pay_this_year + bonus_this_year\nprint(total_income_this_year)", "executes": "True"}}
|
||||
</tool_call>
|
||||
|
||||
[tool]
|
||||
|
||||
220000.0
|
||||
|
||||
[assistant]
|
||||
|
||||
<think>
|
||||
So the code shows that John's total income this year would be $220,000. This is consistent with the 10% bonus on last year's pay of $100,000, which was $10,000. Therefore, this year's bonus of $20,000 added to his total pay of $200,000 results in a total income of $220,000.
|
||||
|
||||
I need to verify that there are no other factors at play. The problem states it's a constant percentage bonus each year, so there's no mention of changing percentages or other variables. Therefore, applying a straightforward percentage of 10% on this year's partial salary amount would result in $20,000. Adding that to his total pay gives $220,000. The calculation is correct and aligns with the question's parameters.
|
||||
</think>
|
||||
|
||||
#### 220000.0
|
||||
[system]
|
||||
|
||||
You are a math expert. You are given a question and you need to solve it step by step. Reasoning step by step before any tool call. You should use the `calc_gsm8k_reward` tool after step by step solving the question, before generate final answer at least once and refine your answer if necessary. Put your final answer in the format of `#### <answer>`.
|
||||
|
||||
# Tools
|
||||
|
||||
You may call one or more functions to assist with the user query.
|
||||
|
||||
You are provided with function signatures within <tools></tools> XML tags:
|
||||
<tools>
|
||||
{"type": "function", "function": {"name": "code_interpreter", "description": "A tool for executing code.", "parameters": {"type": "object", "properties": {"code": {"type": "string", "description": "The code to execute.", "enum": null}}, "required": ["code"]}, "strict": false}}
|
||||
</tools>
|
||||
|
||||
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||
<tool_call>
|
||||
{"name": <function-name>, "arguments": <args-json-object>}
|
||||
</tool_call>
|
||||
|
||||
[user]
|
||||
|
||||
John gets a bonus that's the same percentage every year. Last year he made $100,000 and got a $10,000 bonus. This year he makes $200,000. How much will John make this year when adding both his total pay and bonus together? Let's think step by step and output the final answer after `####`.
|
||||
|
||||
[assistant]
|
||||
|
||||
<think>
|
||||
Okay, let's see. John gets a bonus that's the same percentage every year. Last year, he made $100,000 and got a $10,000 bonus. This year, he makes $200,000. The question is asking how much he'll make this year when adding both his total pay and bonus together.
|
||||
|
||||
First, I need to figure out the percentage bonus he gets. Since the bonus is the same percentage each year, if I can find that percentage based on last year's details, I can apply it to this year's earnings to find this year's bonus. Then, I'll add that bonus to his total pay this year.
|
||||
|
||||
Last year, he earned $100,000 and got a $10,000 bonus. To find the percentage, I can divide the bonus by his total earnings. So, 10,000 divided by 100,000. Let me calculate that.
|
||||
|
||||
10,000 / 100,000 equals 0.1, which is 10% as a percentage. So, his bonus is 10% of his pay each year.
|
||||
|
||||
This year, he makes $200,000. So, the bonus this year would be 10% of 200,000. Let me compute that using the code interpreter to ensure accuracy.
|
||||
<tool_call>
|
||||
{"name": "code_interpreter", "arguments": {"code": "total_pay_this_year = 200000\nbonus_percentage = 10 / 100\nbonus_this_year = total_pay_this_year * bonus_percentage\ntotal_income_this_year = total_pay_this_year + bonus_this_year\nprint(total_income_this_year)", "executes": "True"}}
|
||||
</tool_call>
|
||||
|
||||
[tool]
|
||||
|
||||
220000.0
|
||||
|
||||
[assistant]
|
||||
|
||||
<think>
|
||||
So the code shows that John's total income this year would be $220,000. This is consistent with the 10% bonus on last year's pay of $100,000, which was $10,000. Therefore, this year's bonus of $20,000 added to his total pay of $200,000 results in a total income of $220,000.
|
||||
|
||||
I need to verify that there are no other factors at play. The problem states it's a constant percentage bonus each year, so there's no mention of changing percentages or other variables. Therefore, applying a straightforward percentage of 10% on this year's partial salary amount would result in $20,000. Adding that to his total pay gives $220,000. The calculation is correct and aligns with the question's parameters.
|
||||
</think>
|
||||
|
||||
#### 220000.0
|
||||
|
94
tests/special_sanity/check_pr_description.py
Normal file
94
tests/special_sanity/check_pr_description.py
Normal file
@ -0,0 +1,94 @@
|
||||
# Copyright 2025 Bytedance Ltd. and/or its affiliates
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
|
||||
# Number of lines to check
|
||||
NUM_LINES = 5
|
||||
|
||||
|
||||
# Custom exception types for clear error handling
|
||||
class TemplateFileError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PRBodyLoadError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class PRDescriptionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Path to the PR template file
|
||||
template_file = os.path.join(os.getenv("GITHUB_WORKSPACE", "."), ".github", "PULL_REQUEST_TEMPLATE.md")
|
||||
|
||||
|
||||
def load_template(path):
|
||||
"""
|
||||
Load only the first NUM_LINES of the PR template file as a list of lines,
|
||||
without stripping any characters.
|
||||
"""
|
||||
lines = []
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
for _ in range(NUM_LINES):
|
||||
line = f.readline()
|
||||
if not line:
|
||||
break
|
||||
lines.append(line.strip())
|
||||
return lines
|
||||
except Exception as e:
|
||||
raise TemplateFileError(f"Failed to read PR template (first {NUM_LINES} lines) at {path}: {e}") from e
|
||||
|
||||
|
||||
def load_pr_body(event_path):
|
||||
try:
|
||||
with open(event_path, encoding="utf-8") as f:
|
||||
payload = json.load(f)
|
||||
return payload.get("pull_request", {}).get("body", "") or ""
|
||||
except Exception as e:
|
||||
raise PRBodyLoadError(f"Failed to read PR body from {event_path}: {e}") from e
|
||||
|
||||
|
||||
def check_pr_description(body, template_lines):
|
||||
"""
|
||||
Compare the first NUM_LINES lines of the PR body to the template lines.
|
||||
If they match exactly, the placeholder was not modified.
|
||||
"""
|
||||
pr_lines = body.splitlines(keepends=True)
|
||||
pr_first = [x.strip() for x in pr_lines[:NUM_LINES]]
|
||||
if pr_first == template_lines:
|
||||
raise PRDescriptionError("It looks like you haven't updated the '### What does this PR do?' section. Please replace the placeholder text with a concise description of what your PR does.")
|
||||
else:
|
||||
print(pr_first)
|
||||
print(template_lines)
|
||||
|
||||
|
||||
def main():
|
||||
event_path = os.getenv("GITHUB_EVENT_PATH")
|
||||
if not event_path:
|
||||
raise OSError("GITHUB_EVENT_PATH is not set.")
|
||||
|
||||
template_lines = load_template(template_file)
|
||||
pr_body = load_pr_body(event_path)
|
||||
check_pr_description(pr_body, template_lines)
|
||||
|
||||
print("✅ '### What does this PR do?' section has been filled out.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user