mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 13:43:50 +08:00
[ci] feat: using local dataset to avoid network issue (#3533)
### What does this PR do? - As title ### Checklist Before Starting - [ ] Search for similar PRs. Paste at least one query link here: ... - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. ### API and Usage Example > Demonstrate how the API changes if any, and provide usage example(s) if possible. ```python # Add code snippet or script demonstrating how to use this ``` ### Design & Code Changes > Demonstrate the high-level design if this PR is complex, and list the specific changes. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).)
This commit is contained in:
6
.github/workflows/checkpoint_converter.yml
vendored
6
.github/workflows/checkpoint_converter.yml
vendored
@ -92,8 +92,8 @@ jobs:
|
||||
pip3 install -e .[test]
|
||||
- name: Download Model to Use
|
||||
run: |
|
||||
huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct --local-dir ${HOME}/models/deepseek-ai/deepseek-coder-1.3b-instruct
|
||||
# huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
# huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct --local-dir ${HOME}/models/deepseek-ai/deepseek-coder-1.3b-instruct
|
||||
export HF_HUB_OFFLINE=1
|
||||
- name: Running Huggingface to Megatron dist_ckpt converter (Qwen/Qwen2.5-0.5B)
|
||||
run: |
|
||||
@ -127,7 +127,7 @@ jobs:
|
||||
pip3 install -e .[test]
|
||||
- name: Download Model to Use
|
||||
run: |
|
||||
huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat --local-dir ${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat
|
||||
# huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat --local-dir ${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat
|
||||
export HF_HUB_OFFLINE=1
|
||||
- name: Running Huggingface to Megatron dist_ckpt CPU converter (Qwen/Qwen1.5-MoE-A2.7B-Chat)
|
||||
run: |
|
||||
|
43
.github/workflows/e2e_dapo.yml
vendored
43
.github/workflows/e2e_dapo.yml
vendored
@ -83,9 +83,29 @@ concurrency:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
|
||||
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: github.repository_owner == 'volcengine'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-label: ${{ steps.create-runner.outputs.runner-label }}
|
||||
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- id: create-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "create"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-image: "${{ env.IMAGE }}"
|
||||
|
||||
e2e_dapo:
|
||||
runs-on: [L20x8]
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
@ -93,9 +113,6 @@ jobs:
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@ -105,8 +122,24 @@ jobs:
|
||||
pip3 install --no-deps -e .[test,gpu]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running the E2E test with the DAPO algorithm
|
||||
run: |
|
||||
ray stop --force
|
||||
bash tests/special_e2e/run_dapo.sh
|
||||
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
[
|
||||
setup,
|
||||
e2e_dapo
|
||||
]
|
||||
if: always()
|
||||
steps:
|
||||
- id: destroy-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "destroy"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
|
43
.github/workflows/e2e_genrm_remote.yml
vendored
43
.github/workflows/e2e_genrm_remote.yml
vendored
@ -76,9 +76,29 @@ concurrency:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
|
||||
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: github.repository_owner == 'volcengine'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-label: ${{ steps.create-runner.outputs.runner-label }}
|
||||
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- id: create-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "create"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-image: "${{ env.IMAGE }}"
|
||||
|
||||
e2e_genrm_remote:
|
||||
runs-on: [L20x8]
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
@ -86,9 +106,6 @@ jobs:
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@ -98,8 +115,24 @@ jobs:
|
||||
pip3 install --no-deps -e .[test,gpu]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running the E2E test with the Generative Reward Model
|
||||
run: |
|
||||
ray stop --force
|
||||
bash tests/special_e2e/run_genrm_remote.sh
|
||||
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
[
|
||||
setup,
|
||||
e2e_genrm_remote
|
||||
]
|
||||
if: always()
|
||||
steps:
|
||||
- id: destroy-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "destroy"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
|
51
.github/workflows/e2e_one_step_off_policy.yml
vendored
51
.github/workflows/e2e_one_step_off_policy.yml
vendored
@ -83,10 +83,30 @@ concurrency:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
|
||||
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: github.repository_owner == 'volcengine'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-label: ${{ steps.create-runner.outputs.runner-label }}
|
||||
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- id: create-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "create"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-image: "${{ env.IMAGE }}"
|
||||
|
||||
# Test FSDP2 strategy
|
||||
e2e_one_step_off_policy_fsdp2:
|
||||
runs-on: [L20x8]
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 10 # Increase timeout for async training
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
@ -95,9 +115,6 @@ jobs:
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
ACTOR_STRATEGY: "fsdp2"
|
||||
container:
|
||||
image: verlai/verl:app-verl0.4-vllm0.8.5-mcore0.12.1
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@ -107,7 +124,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test,gpu]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running the E2E test with one_step_off_policy algorithm (FSDP2)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -115,7 +132,8 @@ jobs:
|
||||
|
||||
# Test Megatron strategy
|
||||
e2e_one_step_off_policy_megatron:
|
||||
runs-on: [L20x8]
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 10 # Increase timeout for async training
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
@ -124,9 +142,6 @@ jobs:
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
ACTOR_STRATEGY: "megatron"
|
||||
container:
|
||||
image: verlai/verl:app-verl0.4-vllm0.8.5-mcore0.12.1
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@ -136,9 +151,25 @@ jobs:
|
||||
pip3 install --no-deps -e .[test,gpu]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running the E2E test with one_step_off_policy algorithm (Megatron)
|
||||
run: |
|
||||
ray stop --force
|
||||
bash tests/special_e2e/run_one_step_off_policy.sh
|
||||
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
[
|
||||
setup,
|
||||
e2e_one_step_off_policy_fsdp2,
|
||||
e2e_one_step_off_policy_megatron
|
||||
]
|
||||
if: always()
|
||||
steps:
|
||||
- id: destroy-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "destroy"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
|
286
.github/workflows/e2e_ppo_trainer.yml
vendored
286
.github/workflows/e2e_ppo_trainer.yml
vendored
@ -77,289 +77,3 @@ jobs:
|
||||
with:
|
||||
extra_args: "" # Overriding default "--all-files"
|
||||
|
||||
e2e_ppo_trainer_vllm:
|
||||
runs-on: [L20x8]
|
||||
timeout-minutes: 60 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install --no-deps -e .[test,vllm]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
# HF sanity
|
||||
# - name: Running GSM8K E2E training tests on 1 L20 GPU with hf for sanity
|
||||
# run: |
|
||||
# ray stop --force
|
||||
# bash tests/special_e2e/ppo_trainer/run_single_gpu.sh
|
||||
# # HF sanity
|
||||
# - name: Running GSM8K E2E training tests on 1 L20 GPU with engine interface for sanity.
|
||||
# run: |
|
||||
# ray stop --force
|
||||
# bash tests/special_e2e/ppo_trainer/run_single_gpu_with_engine.sh
|
||||
# Function RM
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP_SIZE=8)
|
||||
run: |
|
||||
ray stop --force
|
||||
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
|
||||
run: |
|
||||
ray stop --force
|
||||
RESUME_MODE=auto VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test merging FSDP checkpoints (Qwen Actor)
|
||||
run: |
|
||||
exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp-size8"
|
||||
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
|
||||
run: |
|
||||
ray stop --force
|
||||
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test merging DDP+FSDP checkpoints (Qwen Actor)
|
||||
run: |
|
||||
exp_name="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4"
|
||||
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP2)
|
||||
run: |
|
||||
ray stop --force
|
||||
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8" STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test merging FSDP2 checkpoints (Qwen Actor)
|
||||
run: |
|
||||
exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8"
|
||||
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E without rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
RM_PAD=False bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=remax USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using customized reward function
|
||||
run: |
|
||||
ray stop --force
|
||||
CUSTOM_REWARD_FN=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with in-reward kl and kl loss
|
||||
run: |
|
||||
ray stop --force
|
||||
USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
# LoRA tests
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True TOTAL_TRAIN_STEPS=1 SAVE_FREQ=1 FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test GRPO LoRA checkpoints merging function
|
||||
run: |
|
||||
export EXP_NAME="qwen2.5-0.5b-function-reward-minimal"
|
||||
ls checkpoints/verl-test/${EXP_NAME}/global_step_1/actor
|
||||
cat checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface/config.json
|
||||
python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/ --target_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon with fsdp2
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
# Model RM
|
||||
- name: Running GRPO GSM8K E2E training tests with FSDP on 8 L20 GPUs (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm
|
||||
run: |
|
||||
ray stop --force
|
||||
bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E without rmpad using model rm
|
||||
run: |
|
||||
ray stop --force
|
||||
RM_PAD=False bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm and ulysses sp=2
|
||||
run: |
|
||||
ray stop --force
|
||||
SP_SIZE=2 bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm and dynamic batch size
|
||||
run: |
|
||||
ray stop --force
|
||||
SEQ_BALANCE=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm with Liger Kernel enabled
|
||||
run: |
|
||||
ray stop --force
|
||||
LIGER=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNELS=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNEL=True FUSED_KERNEL_BACKEND=triton bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E training tests on vllm async
|
||||
run: |
|
||||
ray stop --force
|
||||
export VLLM_USE_V1=1
|
||||
ray start --head
|
||||
TOTAL_TRAIN_STEPS=2 ENGINE=vllm ROLLOUT_MODE=async bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
e2e_ppo_trainer_vllm_vlm:
|
||||
runs-on: [L20x8]
|
||||
needs: pre_commit_for_ppo
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=50g # Visual dataloader requires large memory
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install --no-deps -e .[test,gpu,vllm,geo,trl]
|
||||
pip install "transformers[hf_xet]==4.54.0"
|
||||
# Geo3k
|
||||
- name: Prepare GEO3K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/geo3k.py
|
||||
- name: Running GEO3K VLM GRPO E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
SP_SIZE=2 \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
- name: Running GEO3K VLM PPO E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
SP_SIZE=2 \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GEO3K VLM GRPO E2E lora training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
SP_SIZE=2 \
|
||||
LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
e2e_ppo_trainer_sglang:
|
||||
runs-on: [L20x8]
|
||||
needs: pre_commit_for_ppo
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install -e .[test,gpu,sglang]
|
||||
- name: Prepare gsm8k dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt
|
||||
run: |
|
||||
ray stop --force
|
||||
ENGINE=sglang bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on sglang async
|
||||
run: |
|
||||
ray stop --force
|
||||
TOTAL_TRAIN_STEPS=2 ENGINE=sglang ROLLOUT_MODE=async bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
e2e_ppo_trainer_sglang_vlm:
|
||||
runs-on: [L20x8]
|
||||
needs: pre_commit_for_ppo
|
||||
timeout-minutes: 60 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=50g # Visual dataloader requires large memory
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install -e .[test,geo,gpu,sglang] --no-deps
|
||||
pip install "transformers[hf_xet]==4.54.0"
|
||||
# Geo3k
|
||||
- name: Prepare GEO3K dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/geo3k.py
|
||||
- name: Running GEO3K VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
|
||||
ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GEO3K VLM E2E with rmpad using torch fused kernel (Qwen2.5-VL)
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNELS=True TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
|
||||
ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GEO3K VLM E2E with rmpad using triton fused kernel (Qwen2.5-VL)
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNELS=True FUSED_KERNEL_BACKEND=triton \
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
|
||||
ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
@ -54,6 +54,7 @@ on:
|
||||
- v0.*
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
|
||||
# Other entrypoints
|
||||
- "!docker/**"
|
||||
# Docs
|
||||
@ -69,7 +70,6 @@ on:
|
||||
- "!verl/workers/**/*dp_*.py"
|
||||
# Entrypoints
|
||||
- "verl/worksers/rollout/sglang_rollout/*"
|
||||
- ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
|
||||
- "examples/data_preprocess/gsm8k.py"
|
||||
- "examples/data_preprocess/geo3k.py"
|
||||
- "tests/special_e2e/run_ppo_trainer_megatron.sh"
|
||||
@ -124,7 +124,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -173,7 +173,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with tie-embedding Megatron (Qwen) with train tp > infer tp
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -205,10 +205,10 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Download Model to Use
|
||||
run: |
|
||||
huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
# huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
export HF_HUB_OFFLINE=1
|
||||
- name: Prepare dist_ckpt of Qwen2.5-0.5B, uneven layer distribution only supports dist_ckpt
|
||||
run: |
|
||||
@ -247,7 +247,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -280,7 +280,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -314,7 +314,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare Geo3k dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/geo3k.py
|
||||
python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
|
||||
- name: Prepare dist_ckpt of Qwen2.5-VL-3B, only supports dist_ckpt
|
||||
run: |
|
||||
python3 scripts/converter_hf_to_mcore.py --hf_model_path ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct --output_path checkpoints/verl-test/qwen2.5-vl-3b-megatron
|
||||
@ -326,6 +326,92 @@ jobs:
|
||||
run: |
|
||||
rm -rf checkpoints
|
||||
|
||||
e2e_ppo_trainer_sglang:
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install -e .[test,gpu,sglang]
|
||||
- name: Prepare gsm8k dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt
|
||||
run: |
|
||||
ray stop --force
|
||||
ENGINE=sglang bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on sglang async
|
||||
run: |
|
||||
ray stop --force
|
||||
TOTAL_TRAIN_STEPS=2 ENGINE=sglang ROLLOUT_MODE=async bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
e2e_ppo_trainer_sglang_vlm:
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 60 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install -e .[test,geo,gpu,sglang] --no-deps
|
||||
pip install "transformers[hf_xet]==4.54.0"
|
||||
# Geo3k
|
||||
- name: Prepare GEO3K dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
|
||||
- name: Running GEO3K VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
|
||||
ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GEO3K VLM E2E with rmpad using torch fused kernel (Qwen2.5-VL)
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNELS=True TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
|
||||
ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GEO3K VLM E2E with rmpad using triton fused kernel (Qwen2.5-VL)
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNELS=True FUSED_KERNEL_BACKEND=triton \
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
|
||||
ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
@ -337,6 +423,8 @@ jobs:
|
||||
e2e_ppo_trainer_megatron-deepseek-override-transformer-config,
|
||||
e2e_ppo_trainer_megatron-moe-expert-parallel,
|
||||
e2e_ppo_trainer_megatron-qwen2_5vl-3b,
|
||||
e2e_ppo_trainer_sglang,
|
||||
e2e_ppo_trainer_sglang_vlm
|
||||
]
|
||||
if: always()
|
||||
steps:
|
||||
|
215
.github/workflows/e2e_ppo_trainer_megatron_vllm.yml
vendored
215
.github/workflows/e2e_ppo_trainer_megatron_vllm.yml
vendored
@ -54,6 +54,7 @@ on:
|
||||
- v0.*
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- ".github/workflows/e2e_ppo_trainer_megatron_vllm.yml"
|
||||
# Other entrypoints
|
||||
- "!docker/**"
|
||||
# Docs
|
||||
@ -68,7 +69,6 @@ on:
|
||||
# FSDP
|
||||
- "!verl/workers/**/*dp_*.py"
|
||||
# Entrypoints
|
||||
- ".github/workflows/e2e_ppo_trainer_megatron_vllm.yml"
|
||||
- "examples/data_preprocess/gsm8k.py"
|
||||
- "examples/data_preprocess/geo3k.py"
|
||||
- "tests/special_e2e/run_ppo_trainer_megatron.sh"
|
||||
@ -123,7 +123,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use mbridge e2e to pre-load and save (Deepseek)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -175,7 +175,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) with validation and saving
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -212,7 +212,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with tie-embedding Megatron (Qwen) with train tp > infer tp
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -243,10 +243,10 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Download Model to Use
|
||||
run: |
|
||||
huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
# huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
export HF_HUB_OFFLINE=1
|
||||
- name: Prepare dist_ckpt of Qwen2.5-0.5B, uneven layer distribution only supports dist_ckpt
|
||||
run: |
|
||||
@ -285,7 +285,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -318,7 +318,7 @@ jobs:
|
||||
pip3 install mbridge
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -350,7 +350,7 @@ jobs:
|
||||
pip3 install --no-deps -e .[test]
|
||||
- name: Prepare Geo3k dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/geo3k.py
|
||||
python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
|
||||
- name: Prepare dist_ckpt of Qwen2.5-VL-3B, only supports dist_ckpt
|
||||
run: |
|
||||
python3 scripts/converter_hf_to_mcore.py --hf_model_path ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct --output_path checkpoints/verl-test/qwen2.5-vl-3b-megatron
|
||||
@ -366,6 +366,201 @@ jobs:
|
||||
run: |
|
||||
rm -rf checkpoints
|
||||
|
||||
e2e_ppo_trainer_vllm:
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 60 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install --no-deps -e .[test,vllm]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
# HF sanity
|
||||
# - name: Running GSM8K E2E training tests on 1 L20 GPU with hf for sanity
|
||||
# run: |
|
||||
# ray stop --force
|
||||
# bash tests/special_e2e/ppo_trainer/run_single_gpu.sh
|
||||
# # HF sanity
|
||||
# - name: Running GSM8K E2E training tests on 1 L20 GPU with engine interface for sanity.
|
||||
# run: |
|
||||
# ray stop --force
|
||||
# bash tests/special_e2e/ppo_trainer/run_single_gpu_with_engine.sh
|
||||
# Function RM
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP_SIZE=8)
|
||||
run: |
|
||||
ray stop --force
|
||||
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
|
||||
run: |
|
||||
ray stop --force
|
||||
RESUME_MODE=auto VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test merging FSDP checkpoints (Qwen Actor)
|
||||
run: |
|
||||
exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp-size8"
|
||||
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
|
||||
run: |
|
||||
ray stop --force
|
||||
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test merging DDP+FSDP checkpoints (Qwen Actor)
|
||||
run: |
|
||||
exp_name="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4"
|
||||
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP2)
|
||||
run: |
|
||||
ray stop --force
|
||||
VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8" STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test merging FSDP2 checkpoints (Qwen Actor)
|
||||
run: |
|
||||
exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8"
|
||||
python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E without rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
RM_PAD=False bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=remax USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using customized reward function
|
||||
run: |
|
||||
ray stop --force
|
||||
CUSTOM_REWARD_FN=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with in-reward kl and kl loss
|
||||
run: |
|
||||
ray stop --force
|
||||
USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
# LoRA tests
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True TOTAL_TRAIN_STEPS=1 SAVE_FREQ=1 FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Test GRPO LoRA checkpoints merging function
|
||||
run: |
|
||||
export EXP_NAME="qwen2.5-0.5b-function-reward-minimal"
|
||||
ls checkpoints/verl-test/${EXP_NAME}/global_step_1/actor
|
||||
cat checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface/config.json
|
||||
python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/ --target_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon with fsdp2
|
||||
run: |
|
||||
ray stop --force
|
||||
ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
# Model RM
|
||||
- name: Running GRPO GSM8K E2E training tests with FSDP on 8 L20 GPUs (DeepSeek)
|
||||
run: |
|
||||
ray stop --force
|
||||
MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm
|
||||
run: |
|
||||
ray stop --force
|
||||
bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E without rmpad using model rm
|
||||
run: |
|
||||
ray stop --force
|
||||
RM_PAD=False bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm and ulysses sp=2
|
||||
run: |
|
||||
ray stop --force
|
||||
SP_SIZE=2 bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm and dynamic batch size
|
||||
run: |
|
||||
ray stop --force
|
||||
SEQ_BALANCE=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm with Liger Kernel enabled
|
||||
run: |
|
||||
ray stop --force
|
||||
LIGER=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNELS=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
|
||||
run: |
|
||||
ray stop --force
|
||||
FUSED_KERNEL=True FUSED_KERNEL_BACKEND=triton bash tests/special_e2e/ppo_trainer/run_model_reward.sh
|
||||
- name: Running GSM8K E2E training tests on vllm async
|
||||
run: |
|
||||
ray stop --force
|
||||
export VLLM_USE_V1=1
|
||||
ray start --head
|
||||
TOTAL_TRAIN_STEPS=2 ENGINE=vllm ROLLOUT_MODE=async bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
e2e_ppo_trainer_vllm_vlm:
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=50g # Visual dataloader requires large memory
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Install the current repository
|
||||
run: |
|
||||
pip3 install --no-deps -e .[test,gpu,vllm,geo,trl]
|
||||
pip install "transformers[hf_xet]==4.54.0"
|
||||
# Geo3k
|
||||
- name: Prepare GEO3K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
|
||||
- name: Running GEO3K VLM GRPO E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
SP_SIZE=2 \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
- name: Running GEO3K VLM PPO E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
SP_SIZE=2 \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
- name: Running GEO3K VLM GRPO E2E lora training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
|
||||
MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
|
||||
MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
|
||||
ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
|
||||
SP_SIZE=2 \
|
||||
LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
|
||||
bash tests/special_e2e/ppo_trainer/run_function_reward.sh
|
||||
|
||||
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
@ -378,6 +573,8 @@ jobs:
|
||||
e2e_ppo_trainer_megatron-deepseek-override-transformer-config,
|
||||
e2e_ppo_trainer_megatron-moe-expert-parallel,
|
||||
e2e_ppo_trainer_megatron-qwen2_5vl-3b,
|
||||
e2e_ppo_trainer_vllm,
|
||||
e2e_ppo_trainer_vllm_vlm
|
||||
]
|
||||
if: always()
|
||||
steps:
|
||||
|
4
.github/workflows/e2e_sft.yml
vendored
4
.github/workflows/e2e_sft.yml
vendored
@ -109,7 +109,7 @@ jobs:
|
||||
- name: Prepare gsm8k dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm
|
||||
run: |
|
||||
ray stop --force
|
||||
@ -142,7 +142,7 @@ jobs:
|
||||
- name: Prepare gsm8k dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k_multiturn_sft.py
|
||||
python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running GSM8K E2E training tests with multiturn and various configs and compare results
|
||||
run: |
|
||||
bash tests/special_e2e/sft/test_sft_engine_all.sh
|
||||
|
43
.github/workflows/e2e_spin.yml
vendored
43
.github/workflows/e2e_spin.yml
vendored
@ -52,14 +52,34 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
|
||||
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
|
||||
|
||||
# Cancel jobs on the same ref if a new one is triggered
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: github.repository_owner == 'volcengine'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-label: ${{ steps.create-runner.outputs.runner-label }}
|
||||
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- id: create-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "create"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-image: "${{ env.IMAGE }}"
|
||||
|
||||
e2e_spin:
|
||||
runs-on: [L20x8]
|
||||
needs: setup
|
||||
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
|
||||
timeout-minutes: 40 # Increase this timeout value as needed
|
||||
env:
|
||||
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
|
||||
@ -67,9 +87,6 @@ jobs:
|
||||
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
|
||||
HF_ENDPOINT: "https://hf-mirror.com"
|
||||
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
|
||||
container:
|
||||
image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
|
||||
options: --gpus all --shm-size=10g
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
@ -79,8 +96,24 @@ jobs:
|
||||
pip3 install -e .[test,gpu,sglang]
|
||||
- name: Prepare GSM8K dataset
|
||||
run: |
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Running the E2E test with the spin algorithm
|
||||
run: |
|
||||
ray stop --force
|
||||
bash tests/special_e2e/run_spin.sh
|
||||
|
||||
cleanup:
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
[
|
||||
setup,
|
||||
e2e_spin
|
||||
]
|
||||
if: always()
|
||||
steps:
|
||||
- id: destroy-runner
|
||||
uses: volcengine/vemlp-github-runner@v1
|
||||
with:
|
||||
mode: "destroy"
|
||||
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
|
||||
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
|
8
.github/workflows/sgl.yml
vendored
8
.github/workflows/sgl.yml
vendored
@ -119,14 +119,14 @@ jobs:
|
||||
pip3 install -e .[test]
|
||||
- name: Download Model to Use
|
||||
run: |
|
||||
huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
|
||||
huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
|
||||
# huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
|
||||
# huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
|
||||
# huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
|
||||
export HF_HUB_OFFLINE=1
|
||||
- name: Prepare gsm8k dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Test the latest SGLang Rollout async with agent loop
|
||||
run: |
|
||||
huggingface-cli download verl-team/gsm8k-v0.4.1 --repo-type dataset --local-dir ~/verl-data/gsm8k
|
||||
|
10
.github/workflows/vllm.yml
vendored
10
.github/workflows/vllm.yml
vendored
@ -112,15 +112,15 @@ jobs:
|
||||
pip3 install -e .[test]
|
||||
- name: Download Model to Use
|
||||
run: |
|
||||
huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
|
||||
huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
|
||||
huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
|
||||
huggingface-cli download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
|
||||
# huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
|
||||
# huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
|
||||
# huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
|
||||
# huggingface-cli download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
|
||||
export HF_HUB_OFFLINE=1
|
||||
- name: Prepare gsm8k dataset
|
||||
run: |
|
||||
ray stop --force
|
||||
python3 examples/data_preprocess/gsm8k.py
|
||||
python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
|
||||
- name: Test the latest vLLM Rollout async with agent loop
|
||||
run: |
|
||||
ROLLOUT_NAME=vllm pytest -svvv tests/experimental/agent_loop
|
||||
|
@ -24,14 +24,26 @@ from verl.utils.hdfs_io import copy, makedirs
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--local_dir", default="~/data/geo3k")
|
||||
parser.add_argument("--local_dir", default=None)
|
||||
parser.add_argument("--hdfs_dir", default=None)
|
||||
parser.add_argument("--local_dataset_path", default=None, help="The local path to the raw dataset, if it exists.")
|
||||
parser.add_argument(
|
||||
"--local_save_dir", default="~/data/geo3k", help="The save directory for the preprocessed dataset."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
local_dataset_path = args.local_dataset_path
|
||||
|
||||
data_source = "hiyouga/geometry3k"
|
||||
|
||||
dataset = datasets.load_dataset(data_source)
|
||||
if local_dataset_path is not None:
|
||||
dataset = datasets.load_dataset(
|
||||
local_dataset_path,
|
||||
)
|
||||
else:
|
||||
dataset = datasets.load_dataset(
|
||||
data_source,
|
||||
)
|
||||
|
||||
train_dataset = dataset["train"]
|
||||
test_dataset = dataset["test"]
|
||||
@ -75,12 +87,16 @@ if __name__ == "__main__":
|
||||
train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True, num_proc=8)
|
||||
test_dataset = test_dataset.map(function=make_map_fn("test"), with_indices=True, num_proc=8)
|
||||
|
||||
local_dir = args.local_dir
|
||||
hdfs_dir = args.hdfs_dir
|
||||
local_save_dir = args.local_dir
|
||||
if local_save_dir is not None:
|
||||
print("Warning: Argument 'local_dir' is deprecated. Please use 'local_save_dir' instead.")
|
||||
else:
|
||||
local_save_dir = args.local_save_dir
|
||||
|
||||
train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))
|
||||
test_dataset.to_parquet(os.path.join(local_dir, "test.parquet"))
|
||||
train_dataset.to_parquet(os.path.join(local_save_dir, "train.parquet"))
|
||||
test_dataset.to_parquet(os.path.join(local_save_dir, "test.parquet"))
|
||||
|
||||
if hdfs_dir is not None:
|
||||
makedirs(hdfs_dir)
|
||||
copy(src=local_dir, dst=hdfs_dir)
|
||||
copy(src=local_save_dir, dst=hdfs_dir)
|
||||
|
@ -34,14 +34,22 @@ def extract_solution(solution_str):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--local_dir", default="~/data/gsm8k_sft")
|
||||
parser.add_argument("--local_dir", default=None)
|
||||
parser.add_argument("--local_dataset_path", default=None, help="The local path to the raw dataset, if it exists.")
|
||||
parser.add_argument(
|
||||
"--local_save_dir", default="~/data/gsm8k_sft", help="The save directory for the preprocessed dataset."
|
||||
)
|
||||
parser.add_argument("--hdfs_dir", default=None)
|
||||
|
||||
args = parser.parse_args()
|
||||
local_dataset_path = args.local_dataset_path
|
||||
|
||||
data_source = "openai/gsm8k"
|
||||
|
||||
dataset = datasets.load_dataset(data_source, "main")
|
||||
if local_dataset_path is not None:
|
||||
dataset = datasets.load_dataset(local_dataset_path, "main")
|
||||
else:
|
||||
dataset = datasets.load_dataset(data_source, "main")
|
||||
|
||||
train_dataset = dataset["train"]
|
||||
test_dataset = dataset["test"]
|
||||
@ -75,13 +83,20 @@ if __name__ == "__main__":
|
||||
train_dataset = train_dataset.map(function=make_map_fn("train"), with_indices=True)
|
||||
test_dataset = test_dataset.map(function=make_map_fn("test"), with_indices=True)
|
||||
|
||||
local_dir = os.path.expanduser(args.local_dir)
|
||||
hdfs_dir = args.hdfs_dir
|
||||
|
||||
train_dataset.to_parquet(os.path.join(local_dir, "train.parquet"))
|
||||
test_dataset.to_parquet(os.path.join(local_dir, "test.parquet"))
|
||||
local_save_dir = args.local_dir
|
||||
if local_save_dir is not None:
|
||||
print("Warning: Argument 'local_dir' is deprecated. Please use 'local_save_dir' instead.")
|
||||
else:
|
||||
local_save_dir = args.local_save_dir
|
||||
|
||||
local_save_dir = os.path.expanduser(local_save_dir)
|
||||
|
||||
train_dataset.to_parquet(os.path.join(local_save_dir, "train.parquet"))
|
||||
test_dataset.to_parquet(os.path.join(local_save_dir, "test.parquet"))
|
||||
|
||||
if hdfs_dir is not None:
|
||||
makedirs(hdfs_dir)
|
||||
|
||||
copy(src=local_dir, dst=hdfs_dir)
|
||||
copy(src=local_save_dir, dst=hdfs_dir)
|
||||
|
@ -3,7 +3,7 @@ set -x
|
||||
export CUDA_DEVICE_MAX_CONNECTIONS=1 # For megatron communication/computation overlapping
|
||||
|
||||
# 0. download the model
|
||||
huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat
|
||||
#huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat
|
||||
|
||||
# 1. convert the model to mcore format
|
||||
# change the HF_MODEL_PATH and DIST_CKPT_PATH to your own path
|
||||
|
@ -5,7 +5,7 @@ NUM_GPUS=${NUM_GPUS:-8}
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
adv_estimator=grpo
|
||||
|
||||
|
@ -5,7 +5,7 @@ export no_proxy="localhost,127.0.0.1"
|
||||
set -x
|
||||
|
||||
# Launch a vllm server
|
||||
CUDA_VISIBLE_DEVICES=0 vllm serve verl-team/GenRM-CI-Test-1.5B \
|
||||
CUDA_VISIBLE_DEVICES=0 vllm serve $HOME/models/verl-team/GenRM-CI-Test-1.5B \
|
||||
--served_model_name genrm-demo --host localhost --port 30000 > /dev/null &
|
||||
SERVER_PID=$!
|
||||
|
||||
@ -51,7 +51,7 @@ CUDA_VISIBLE_DEVICES=4,5,6,7 python3 -m verl.trainer.main_ppo \
|
||||
data.max_response_length=2048 \
|
||||
data.filter_overlong_prompts=True \
|
||||
data.truncation='error' \
|
||||
actor_rollout_ref.model.path=Qwen/Qwen2.5-0.5B-Instruct \
|
||||
actor_rollout_ref.model.path=$HOME/models/Qwen/Qwen2.5-0.5B-Instruct \
|
||||
actor_rollout_ref.actor.optim.lr=1e-6 \
|
||||
actor_rollout_ref.model.use_remove_padding=True \
|
||||
actor_rollout_ref.actor.ppo_mini_batch_size=128 \
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
set -x
|
||||
|
||||
huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-VL-3B-Instruct
|
||||
#huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-VL-3B-Instruct
|
||||
|
||||
ulimit -n 65535
|
||||
|
||||
|
@ -9,7 +9,7 @@ MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
if [ ! -d "$MODEL_PATH" ]; then
|
||||
echo "Downloading model to ${MODEL_PATH}..."
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
# huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
else
|
||||
echo "Model directory ${MODEL_PATH} already exists, skip downloading."
|
||||
fi
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
set -x
|
||||
|
||||
huggingface-cli download Qwen/Qwen2.5-3B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-3B-Instruct
|
||||
#huggingface-cli download Qwen/Qwen2.5-3B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-3B-Instruct
|
||||
|
||||
ulimit -n 65535
|
||||
|
||||
|
@ -11,7 +11,7 @@ ACTOR_STRATEGY=${ACTOR_STRATEGY:-"fsdp2"} # fsdp2 or megatron
|
||||
# Download model if not exists
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
# Algorithm parameters
|
||||
adv_estimator=grpo
|
||||
|
@ -9,7 +9,7 @@ NUM_GPUS=${NUM_GPUS:-8}
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
USE_DUMMY_MODEL=${USE_DUMMY_MODEL:-False}
|
||||
DUMMY_MODEL_PATH=${DUMMY_MODEL_PATH:-${HOME}/dummy_models/${MODEL_ID}}
|
||||
|
@ -5,7 +5,7 @@ NUM_GPUS=${NUM_GPUS:-8}
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
TRAIN_FILES=${TRAIN_FILES:-${HOME}/data/gsm8k/train.parquet}
|
||||
VAL_FILES=${VAL_FILES:-${HOME}/data/gsm8k/test.parquet}
|
||||
|
@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
set -xeuo pipefail
|
||||
|
||||
huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
|
||||
--local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
||||
#huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
|
||||
# --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
||||
|
||||
python3 -m verl.trainer.main_generation \
|
||||
trainer.nnodes=1 \
|
||||
|
@ -6,7 +6,7 @@ exp_name="Qwen2.5-0.5B-Instruct-spin-minimal"
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
CUDA_VISIBLE_DEVICES=${VISIBLE_DEVICES} python3 -m recipe.spin.main_spin \
|
||||
data.train_files="${HOME}/data/gsm8k/train.parquet" \
|
||||
|
@ -9,7 +9,7 @@ exp_name="Qwen2.5-0.5B-Instruct-sppo-minimal"
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
python3 -m recipe.sppo.main_sppo \
|
||||
data.train_files="${HOME}/data/math/train.parquet" \
|
||||
|
@ -6,7 +6,7 @@ CONFIG_NAME="$1"
|
||||
ENGINE="${2:-vllm}"
|
||||
|
||||
# Download model if needed
|
||||
huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir "$HOME/models/Qwen/Qwen2.5-0.5B"
|
||||
#huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir "$HOME/models/Qwen/Qwen2.5-0.5B"
|
||||
|
||||
# Run the training with the specified configuration
|
||||
python3 -m verl.trainer.main_ppo \
|
||||
|
@ -7,7 +7,7 @@ NUM_GPUS=${NUM_GPUS:-8}
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-0.5B-Instruct}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
TRAIN_FILES=${TRAIN_FILES:-$HOME/data/gsm8k/train.parquet}
|
||||
VAL_FILES=${VAL_FILES:-$HOME/data/gsm8k/test.parquet}
|
||||
|
@ -18,7 +18,7 @@ ckpts_home=${ckpts_home:-~/verl/test/gsm8k-sft-${backend}}
|
||||
|
||||
MODEL_ID=${MODEL_ID:-Qwen/Qwen3-0.6B}
|
||||
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
|
||||
huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
#huggingface-cli download "${MODEL_ID}" --local-dir "${MODEL_PATH}"
|
||||
|
||||
SP_SIZE=${SP_SIZE:-1}
|
||||
FSDP_SIZE=${FSDP_SIZE:-${NUM_GPUS}}
|
||||
|
Reference in New Issue
Block a user