diff --git a/.github/ISSUE_TEMPLATE/900-release-checklist.yml b/.github/ISSUE_TEMPLATE/900-release-checklist.yml index c5a03f8c7..167a201e4 100644 --- a/.github/ISSUE_TEMPLATE/900-release-checklist.yml +++ b/.github/ISSUE_TEMPLATE/900-release-checklist.yml @@ -95,6 +95,6 @@ body: - [ ] Upload 310p wheel to Github release page - - [ ] Brodcast the release news (By message, blog , etc) + - [ ] Broadcast the release news (By message, blog , etc) - [ ] Close this issue diff --git a/.github/workflows/doc_codespell.yaml b/.github/workflows/doc_codespell.yaml deleted file mode 100644 index 156ad71e5..000000000 --- a/.github/workflows/doc_codespell.yaml +++ /dev/null @@ -1,33 +0,0 @@ - -name: 'doc-codespell' - -on: - pull_request: - branches: - - 'main' - - '*-dev' - paths: - - 'docs/**' - -jobs: - codespell: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10"] - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements-lint.txt - - name: Run codespell check - run: | - CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**') - CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever') - - codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 000000000..25b802a65 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,37 @@ +name: pre-commit + +on: + workflow_call: + +permissions: + contents: read + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v4 + - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0 + with: + python-version: "3.10" + - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json" + - run: echo "::add-matcher::.github/workflows/matchers/mypy.json" + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v4 + with: + repository: vllm-project/vllm + path: ./vllm-empty + - name: Install vllm + working-directory: vllm-empty + run: | + pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu + VLLM_TARGET_DEVICE=empty pip install . + - name: Install vllm-ascend dev + run: | + pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu + - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + env: + SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint + with: + extra_args: --all-files --hook-stage manual diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml deleted file mode 100644 index 3ff01d098..000000000 --- a/.github/workflows/shellcheck.yml +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright 2023 The vLLM team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# Adapted from vllm-project/vllm/blob/main/.github -# - -name: Lint shell scripts -on: - pull_request: - branches: - - 'main' - - '*-dev' - paths: - - '**/*.sh' - - '.github/workflows/shellcheck.yml' - -env: - LC_ALL: en_US.UTF-8 - -defaults: - run: - shell: bash - -permissions: - contents: read - -jobs: - shellcheck: - runs-on: ubuntu-latest - steps: - - name: "Checkout" - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - fetch-depth: 0 - - - name: "Check shell scripts" - run: | - tools/shellcheck.sh diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index de2d430ea..ee6bb5f47 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -69,67 +69,7 @@ jobs: lint: # Only trigger lint on pull request if: ${{ github.event_name == 'pull_request' }} - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10"] - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements-lint.txt - - name: Run codespell check - run: | - CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**') - CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever') - - codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}" - - name: Analysing the code with ruff - run: | - echo "::add-matcher::.github/workflows/matchers/ruff.json" - ruff check --output-format github . - - name: Run isort - run: | - isort . --check-only - - name: Running yapf - run: | - python -m pip install --upgrade pip - pip install toml - pip install yapf==0.32.0 - yapf --diff --recursive . - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v4 - with: - repository: vllm-project/vllm - path: vllm-empty - - - name: Actionlint Check - env: - SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086 - run: | - echo "::add-matcher::.github/workflows/matchers/actionlint.json" - tools/actionlint.sh -color - - - name: Install vllm-project/vllm from source - working-directory: vllm-empty - run: | - pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu - VLLM_TARGET_DEVICE=empty pip install . - - - name: Install dependencies - run: | - pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu - - - name: Mypy Check - run: | - echo "::add-matcher::.github/workflows/matchers/mypy.json" - tools/mypy.sh 1 ${{ matrix.python-version }} + uses: ./.github/workflows/pre-commit.yml ut: needs: [lint] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..99df561d6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,141 @@ +default_install_hook_types: + - pre-commit + - commit-msg +default_stages: + - pre-commit # Run locally + - manual # Run in CI +exclude: 'examples/.*' # Exclude examples from all hooks by default +repos: +- repo: https://github.com/codespell-project/codespell + rev: v2.4.1 + hooks: + - id: codespell + args: [ + --toml, pyproject.toml, + '--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml', + '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn' + ] + additional_dependencies: + - tomli +- repo: https://github.com/google/yapf + rev: v0.43.0 + hooks: + - id: yapf + args: [--in-place, --verbose] + # Keep the same list from yapfignore here to avoid yapf failing without any inputs + exclude: '(.github|benchmarks|examples|docs)/.*' +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.11.7 + hooks: + - id: ruff + args: [--output-format, github, --fix] + - id: ruff-format + files: ^(benchmarks|examples)/.* +- repo: https://github.com/crate-ci/typos + rev: v1.32.0 + hooks: + - id: typos +- repo: https://github.com/PyCQA/isort + rev: 6.0.1 + hooks: + - id: isort +# - repo: https://github.com/pre-commit/mirrors-clang-format +# rev: v20.1.3 +# hooks: +# - id: clang-format +# files: ^csrc/.*\.(cpp|hpp|cc|hh|cxx|hxx)$ +# types_or: [c++] +# args: [--style=google, --verbose] +# - repo: https://github.com/jackdewinter/pymarkdown +# rev: v0.9.29 +# hooks: +# - id: pymarkdown +# args: [fix] +- repo: https://github.com/rhysd/actionlint + rev: v1.7.7 + hooks: + - id: actionlint +- repo: local + hooks: + # For local development, you can run mypy using tools/mypy.sh script if needed. + # - id: mypy-local + # name: Run mypy for local Python installation + # entry: tools/mypy.sh 0 "local" + # language: system + # types: [python] + # stages: [pre-commit] # Don't run in CI + - id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward + name: Run mypy for Python 3.9 + entry: tools/mypy.sh 1 "3.9" + # Use system python because vllm installation is required + language: system + types: [python] + stages: [manual] # Only run in CI + - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward + name: Run mypy for Python 3.10 + entry: tools/mypy.sh 1 "3.10" + # Use system python because vllm installation is required + language: system + types: [python] + stages: [manual] # Only run in CI + - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward + name: Run mypy for Python 3.11 + entry: tools/mypy.sh 1 "3.11" + # Use system python because vllm installation is required + language: system + types: [python] + stages: [manual] # Only run in CI + - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward + name: Run mypy for Python 3.12 + entry: tools/mypy.sh 1 "3.12" + # Use system python because vllm installation is required + language: system + types: [python] + stages: [manual] # Only run in CI + # FIXME: enable shellcheck + # - id: shellcheck + # name: Lint shell scripts + # entry: tools/shellcheck.sh + # language: script + # types: [shell] + - id: png-lint + name: Lint PNG exports from excalidraw + entry: tools/png-lint.sh + language: script + types: [png] + - id: signoff-commit + name: Sign-off Commit + entry: bash + args: + - -c + - | + if ! grep -q "^Signed-off-by: $(git config user.name) <$(git config user.email)>" "$(git rev-parse --git-path COMMIT_EDITMSG)"; then + printf "\nSigned-off-by: $(git config user.name) <$(git config user.email)>\n" >> "$(git rev-parse --git-path COMMIT_EDITMSG)" + fi + language: system + verbose: true + stages: [commit-msg] + - id: check-filenames + name: Check for spaces in all filenames + entry: bash + args: + - -c + - 'git ls-files | grep " " && echo "Filenames should not contain spaces!" && exit 1 || exit 0' + language: system + always_run: true + pass_filenames: false + - id: enforce-import-regex-instead-of-re + name: Enforce import regex as re + entry: python tools/enforce_regex_import.py + language: python + types: [python] + pass_filenames: false + additional_dependencies: [regex] + # Keep `suggestion` last + - id: suggestion + name: Suggestion + entry: bash -c 'echo "To bypass pre-commit hooks, add --no-verify to git commit."' + language: system + verbose: true + pass_filenames: false + # Insert new entries above the `suggestion` entry diff --git a/benchmarks/ops/ben_vocabparallelembedding.py b/benchmarks/ops/ben_vocabparallelembedding.py index e91cfed7b..b3ef7ec50 100644 --- a/benchmarks/ops/ben_vocabparallelembedding.py +++ b/benchmarks/ops/ben_vocabparallelembedding.py @@ -12,12 +12,12 @@ import vllm_ascend.platform # noqa: F401 def benchmark_npu(fn, num_iterations=100, num_warmup_iterations=50): """ Benchmark function for NPU operations - + Args: fn: Function to benchmark num_iterations: Number of timing iterations num_warmup_iterations: Number of warmup iterations - + Returns: float: Minimum elapsed time in seconds """ @@ -41,19 +41,26 @@ def benchmark_npu(fn, num_iterations=100, num_warmup_iterations=50): def get_masked_input_and_mask_ref( - input_: torch.Tensor, org_vocab_start_index: int, - org_vocab_end_index: int, num_org_vocab_padding: int, - added_vocab_start_index: int, - added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]: + input_: torch.Tensor, + org_vocab_start_index: int, + org_vocab_end_index: int, + num_org_vocab_padding: int, + added_vocab_start_index: int, + added_vocab_end_index: int, +) -> Tuple[torch.Tensor, torch.Tensor]: """Reference implementation for verification""" - org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < - org_vocab_end_index) + org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index) added_vocab_mask = (input_ >= added_vocab_start_index) & ( - input_ < added_vocab_end_index) - added_offset = added_vocab_start_index - ( - org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding - valid_offset = (org_vocab_start_index * - org_vocab_mask) + (added_offset * added_vocab_mask) + input_ < added_vocab_end_index + ) + added_offset = ( + added_vocab_start_index + - (org_vocab_end_index - org_vocab_start_index) + - num_org_vocab_padding + ) + valid_offset = (org_vocab_start_index * org_vocab_mask) + ( + added_offset * added_vocab_mask + ) vocab_mask = org_vocab_mask | added_vocab_mask masked_input = vocab_mask * (input_ - valid_offset) return masked_input, ~vocab_mask @@ -94,21 +101,25 @@ def test_get_masked_input_and_mask( # Define reference function def ref_fn(): - return get_masked_input_and_mask_ref(input_tensor, - test_case["org_start"], - test_case["org_end"], - test_case["padding"], - test_case["added_start"], - test_case["added_end"]) + return get_masked_input_and_mask_ref( + input_tensor, + test_case["org_start"], + test_case["org_end"], + test_case["padding"], + test_case["added_start"], + test_case["added_end"], + ) # Define custom function def custom_fn(): - return torch.ops._C.get_masked_input_and_mask(input_tensor, - test_case["org_start"], - test_case["org_end"], - test_case["padding"], - test_case["added_start"], - test_case["added_end"]) + return torch.ops._C.get_masked_input_and_mask( + input_tensor, + test_case["org_start"], + test_case["org_end"], + test_case["padding"], + test_case["added_start"], + test_case["added_end"], + ) # Get results for correctness testing ref_masked_input, ref_mask = ref_fn() @@ -120,9 +131,9 @@ def test_get_masked_input_and_mask( # Print performance results print("\nPerformance Results:") - print(f"Reference implementation: {ref_time*1000:.3f} ms") - print(f"Custom implementation: {custom_time*1000:.3f} ms") - print(f"Speedup: {ref_time/custom_time:.2f}x") + print(f"Reference implementation: {ref_time * 1000:.3f} ms") + print(f"Custom implementation: {custom_time * 1000:.3f} ms") + print(f"Speedup: {ref_time / custom_time:.2f}x") # Compare results for correctness ref_masked_input = ref_masked_input.to(dtype) @@ -136,9 +147,12 @@ def test_get_masked_input_and_mask( ref_masked_input, rtol=1e-5, atol=1e-5, - msg=f"Masked input mismatch for case: {test_case}") - torch.testing.assert_close(custom_mask, - ref_mask, - rtol=1e-5, - atol=1e-5, - msg=f"Mask mismatch for case: {test_case}") + msg=f"Masked input mismatch for case: {test_case}", + ) + torch.testing.assert_close( + custom_mask, + ref_mask, + rtol=1e-5, + atol=1e-5, + msg=f"Mask mismatch for case: {test_case}", + ) diff --git a/benchmarks/scripts/convert_json_to_markdown.py b/benchmarks/scripts/convert_json_to_markdown.py index 7a1c5d996..112043426 100644 --- a/benchmarks/scripts/convert_json_to_markdown.py +++ b/benchmarks/scripts/convert_json_to_markdown.py @@ -49,36 +49,43 @@ def read_markdown(file): def results_to_json(latency, throughput, serving): - return json.dumps({ - 'latency': latency.to_dict(), - 'throughput': throughput.to_dict(), - 'serving': serving.to_dict() - }) + return json.dumps( + { + "latency": latency.to_dict(), + "throughput": throughput.to_dict(), + "serving": serving.to_dict(), + } + ) if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Process the results of the benchmark tests.") + description="Process the results of the benchmark tests." + ) parser.add_argument( "--results_folder", type=str, default="../results/", - help="The folder where the benchmark results are stored.") + help="The folder where the benchmark results are stored.", + ) parser.add_argument( "--output_folder", type=str, default="../results/", - help="The folder where the benchmark results are stored.") - parser.add_argument("--markdown_template", - type=str, - default="./perf_result_template.md", - help="The template file for the markdown report.") - parser.add_argument("--tag", - default="main", - help="Tag to be used for release message.") - parser.add_argument("--commit_id", - default="", - help="Commit ID to be used for release message.") + help="The folder where the benchmark results are stored.", + ) + parser.add_argument( + "--markdown_template", + type=str, + default="./perf_result_template.md", + help="The template file for the markdown report.", + ) + parser.add_argument( + "--tag", default="main", help="Tag to be used for release message." + ) + parser.add_argument( + "--commit_id", default="", help="Commit ID to be used for release message." + ) args = parser.parse_args() results_folder = (CUR_PATH / args.results_folder).resolve() @@ -87,7 +94,6 @@ if __name__ == "__main__": # collect results for test_file in results_folder.glob("*.json"): - with open(test_file) as f: raw_result = json.loads(f.read()) @@ -111,7 +117,8 @@ if __name__ == "__main__": for perc in [10, 25, 50, 75, 90, 99]: # Multiply 1000 to convert the time unit from s to ms raw_result.update( - {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}) + {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]} + ) raw_result["avg_latency"] = raw_result["avg_latency"] * 1000 # add the result to raw_result @@ -129,55 +136,53 @@ if __name__ == "__main__": continue print(f"Skipping {test_file}") - serving_results.sort(key=lambda x: (len(x['test_name']), x['test_name'])) + serving_results.sort(key=lambda x: (len(x["test_name"]), x["test_name"])) latency_results = pd.DataFrame.from_dict(latency_results) serving_results = pd.DataFrame.from_dict(serving_results) throughput_results = pd.DataFrame.from_dict(throughput_results) - raw_results_json = results_to_json(latency_results, throughput_results, - serving_results) + raw_results_json = results_to_json( + latency_results, throughput_results, serving_results + ) # remapping the key, for visualization purpose if not latency_results.empty: - latency_results = latency_results[list( - latency_column_mapping.keys())].rename( - columns=latency_column_mapping) + latency_results = latency_results[list(latency_column_mapping.keys())].rename( + columns=latency_column_mapping + ) if not serving_results.empty: - serving_results = serving_results[list( - serving_column_mapping.keys())].rename( - columns=serving_column_mapping) + serving_results = serving_results[list(serving_column_mapping.keys())].rename( + columns=serving_column_mapping + ) if not throughput_results.empty: - throughput_results = throughput_results[list( - throughput_results_column_mapping.keys())].rename( - columns=throughput_results_column_mapping) + throughput_results = throughput_results[ + list(throughput_results_column_mapping.keys()) + ].rename(columns=throughput_results_column_mapping) - processed_results_json = results_to_json(latency_results, - throughput_results, - serving_results) + processed_results_json = results_to_json( + latency_results, throughput_results, serving_results + ) # get markdown tables - latency_md_table = tabulate(latency_results, - headers='keys', - tablefmt='pipe', - showindex=False) - serving_md_table = tabulate(serving_results, - headers='keys', - tablefmt='pipe', - showindex=False) - throughput_md_table = tabulate(throughput_results, - headers='keys', - tablefmt='pipe', - showindex=False) + latency_md_table = tabulate( + latency_results, headers="keys", tablefmt="pipe", showindex=False + ) + serving_md_table = tabulate( + serving_results, headers="keys", tablefmt="pipe", showindex=False + ) + throughput_md_table = tabulate( + throughput_results, headers="keys", tablefmt="pipe", showindex=False + ) # document the result print(output_folder) with open(output_folder / "benchmark_results.md", "w") as f: - results = read_markdown(markdown_template) results = results.format( latency_tests_markdown_table=latency_md_table, throughput_tests_markdown_table=throughput_md_table, serving_tests_markdown_table=serving_md_table, - benchmarking_results_in_json_string=processed_results_json) + benchmarking_results_in_json_string=processed_results_json, + ) f.write(results) diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py index 5c8a6662d..61e588bac 100644 --- a/benchmarks/scripts/patch_benchmark_dataset.py +++ b/benchmarks/scripts/patch_benchmark_dataset.py @@ -7,9 +7,8 @@ import libcst.matchers as m # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls -# TDOO(Potabk): Remove this patch when the issue is fixed in the upstream +# TODO(Potabk): Remove this patch when the issue is fixed in the upstream class StreamingFalseTransformer(cst.CSTTransformer): - def __init__(self): self.in_target_class = False self.in_target_func = False @@ -63,15 +62,18 @@ def patch_file(path): print(f"Patched: {abs_path}") -if __name__ == '__main__': +if __name__ == "__main__": parser = ArgumentParser( - description= - "Patch benchmark_dataset.py to set streaming=False in load_dataset calls" + description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls" + ) + parser.add_argument( + "--path", type=str, help="Path to the benchmark_dataset.py file" ) parser.add_argument( "--path", type=str, default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py", - help="Path to the benchmark_dataset.py file") + help="Path to the benchmark_dataset.py file", + ) args = parser.parse_args() patch_file(args.path) diff --git a/benchmarks/scripts/run_accuracy.py b/benchmarks/scripts/run_accuracy.py index 2922c52c6..e7cd5c6f9 100644 --- a/benchmarks/scripts/run_accuracy.py +++ b/benchmarks/scripts/run_accuracy.py @@ -44,82 +44,72 @@ BATCH_SIZE = {"ceval-valid": 1, "mmlu": 1, "gsm8k": "auto", "mmmu_val": 1} MODEL_TYPE = { "Qwen/Qwen3-8B-Base": "vllm", "Qwen/Qwen3-30B-A3B": "vllm", - "Qwen/Qwen2.5-VL-7B-Instruct": "vllm-vlm" + "Qwen/Qwen2.5-VL-7B-Instruct": "vllm-vlm", } # Command templates for running evaluations MODEL_RUN_INFO = { - "Qwen/Qwen3-30B-A3B": - ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n" - "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n" - "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1" - ), - "Qwen/Qwen3-8B-Base": - ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n" - "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n" - "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1" - ), - "Qwen/Qwen2.5-VL-7B-Instruct": - ("export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n" - "lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n" - "--apply_chat_template --fewshot_as_multiturn --batch_size 1"), + "Qwen/Qwen3-30B-A3B": ( + "export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n" + "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n" + "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1" + ), + "Qwen/Qwen3-8B-Base": ( + "export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n" + "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n" + "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1" + ), + "Qwen/Qwen2.5-VL-7B-Instruct": ( + "export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n" + "lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n" + "--apply_chat_template --fewshot_as_multiturn --batch_size 1" + ), } # Evaluation metric filters per task FILTER = { "gsm8k": "exact_match,flexible-extract", "ceval-valid": "acc,none", - "mmmu_val": "acc,none" + "mmmu_val": "acc,none", } # Expected accuracy values for models EXPECTED_VALUE = { - "Qwen/Qwen3-30B-A3B": { - "ceval-valid": 0.83, - "gsm8k": 0.85 - }, - "Qwen/Qwen3-8B-Base": { - "ceval-valid": 0.82, - "gsm8k": 0.83 - }, - "Qwen/Qwen2.5-VL-7B-Instruct": { - "mmmu_val": 0.51 - } + "Qwen/Qwen3-30B-A3B": {"ceval-valid": 0.83, "gsm8k": 0.85}, + "Qwen/Qwen3-8B-Base": {"ceval-valid": 0.82, "gsm8k": 0.83}, + "Qwen/Qwen2.5-VL-7B-Instruct": {"mmmu_val": 0.51}, } PARALLEL_MODE = { "Qwen/Qwen3-8B-Base": "TP", "Qwen/Qwen2.5-VL-7B-Instruct": "TP", - "Qwen/Qwen3-30B-A3B": "EP" + "Qwen/Qwen3-30B-A3B": "EP", } # Execution backend configuration EXECUTION_MODE = { "Qwen/Qwen3-8B-Base": "ACLGraph", "Qwen/Qwen2.5-VL-7B-Instruct": "ACLGraph", - "Qwen/Qwen3-30B-A3B": "ACLGraph" + "Qwen/Qwen3-30B-A3B": "ACLGraph", } # Model arguments for evaluation MODEL_ARGS = { - "Qwen/Qwen3-8B-Base": - "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6", - "Qwen/Qwen2.5-VL-7B-Instruct": - "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2", - "Qwen/Qwen3-30B-A3B": - "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True" + "Qwen/Qwen3-8B-Base": "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6", + "Qwen/Qwen2.5-VL-7B-Instruct": "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2", + "Qwen/Qwen3-30B-A3B": "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True", } # Whether to apply chat template formatting APPLY_CHAT_TEMPLATE = { "Qwen/Qwen3-8B-Base": True, "Qwen/Qwen2.5-VL-7B-Instruct": True, - "Qwen/Qwen3-30B-A3B": False + "Qwen/Qwen3-30B-A3B": False, } # Few-shot examples handling as multi-turn dialogues. FEWSHOT_AS_MULTITURN = { "Qwen/Qwen3-8B-Base": True, "Qwen/Qwen2.5-VL-7B-Instruct": True, - "Qwen/Qwen3-30B-A3B": False + "Qwen/Qwen3-30B-A3B": False, } # Relative tolerance for accuracy checks @@ -136,7 +126,7 @@ def run_accuracy_test(queue, model, dataset): "tasks": dataset, "apply_chat_template": APPLY_CHAT_TEMPLATE[model], "fewshot_as_multiturn": FEWSHOT_AS_MULTITURN[model], - "batch_size": BATCH_SIZE[dataset] + "batch_size": BATCH_SIZE[dataset], } if MODEL_TYPE[model] == "vllm": @@ -151,7 +141,7 @@ def run_accuracy_test(queue, model, dataset): queue.put(e) sys.exit(1) finally: - if 'results' in locals(): + if "results" in locals(): del results gc.collect() torch.npu.empty_cache() @@ -161,16 +151,15 @@ def run_accuracy_test(queue, model, dataset): def generate_md(model_name, tasks_list, args, datasets): """Generate Markdown report with evaluation results""" # Format the run command - run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name, - datasets=datasets) + run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name, datasets=datasets) model = model_name.split("/")[1] # Version information section version_info = ( f"**vLLM Version**: vLLM: {args.vllm_version} " - f"([{args.vllm_commit}]({VLLM_URL+args.vllm_commit})), " + f"([{args.vllm_commit}]({VLLM_URL + args.vllm_commit})), " f"vLLM Ascend: {args.vllm_ascend_version} " - f"([{args.vllm_ascend_commit}]({VLLM_ASCEND_URL+args.vllm_ascend_commit})) " + f"([{args.vllm_ascend_commit}]({VLLM_ASCEND_URL + args.vllm_ascend_commit})) " ) # Report header with system info @@ -218,21 +207,39 @@ def generate_md(model_name, tasks_list, args, datasets): else: n_shot = "0" flag = ACCURACY_FLAG.get(task_name, "") - row = (f"| {task_name:<37} " - f"| {flt:<6} " - f"| {n_shot:6} " - f"| {metric:<6} " - f"| {flag}{value:>5.4f} " - f"| ± {stderr:>5.4f} |") + row = ( + f"| {task_name:<37} " + f"| {flt:<6} " + f"| {n_shot:6} " + f"| {metric:<6} " + f"| {flag}{value:>5.4f} " + f"| ± {stderr:>5.4f} |" + ) if not task_name.startswith("-"): rows.append(row) - rows_sub.append("
" + "\n" + "" + task_name + - " details" + "" + "\n" * 2 + header) + rows_sub.append( + "
" + + "\n" + + "" + + task_name + + " details" + + "" + + "\n" * 2 + + header + ) rows_sub.append(row) rows_sub.append("
") # Combine all Markdown sections - md = preamble + "\n" + header + "\n" + "\n".join(rows) + "\n" + "\n".join( - rows_sub) + "\n" + md = ( + preamble + + "\n" + + header + + "\n" + + "\n".join(rows) + + "\n" + + "\n".join(rows_sub) + + "\n" + ) print(md) return md @@ -262,8 +269,9 @@ def main(args): # Evaluate model on each dataset for dataset in datasets: accuracy_expected = EXPECTED_VALUE[args.model][dataset] - p = multiprocessing.Process(target=run_accuracy_test, - args=(result_queue, args.model, dataset)) + p = multiprocessing.Process( + target=run_accuracy_test, args=(result_queue, args.model, dataset) + ) p.start() p.join() if p.is_alive(): @@ -274,8 +282,11 @@ def main(args): time.sleep(10) result = result_queue.get() print(result) - if accuracy_expected - RTOL < result[dataset][ - FILTER[dataset]] < accuracy_expected + RTOL: + if ( + accuracy_expected - RTOL + < result[dataset][FILTER[dataset]] + < accuracy_expected + RTOL + ): ACCURACY_FLAG[dataset] = "✅" else: ACCURACY_FLAG[dataset] = "❌" @@ -285,10 +296,11 @@ def main(args): if __name__ == "__main__": - multiprocessing.set_start_method('spawn', force=True) + multiprocessing.set_start_method("spawn", force=True) # Initialize argument parser parser = argparse.ArgumentParser( - description="Run model accuracy evaluation and generate report") + description="Run model accuracy evaluation and generate report" + ) parser.add_argument("--output", type=str, required=True) parser.add_argument("--model", type=str, required=True) parser.add_argument("--vllm_ascend_version", type=str, required=False) diff --git a/docs/source/developer_guide/contribution/index.md b/docs/source/developer_guide/contribution/index.md index 8f61ae1f1..795f11afd 100644 --- a/docs/source/developer_guide/contribution/index.md +++ b/docs/source/developer_guide/contribution/index.md @@ -12,38 +12,55 @@ Theoretically, the vllm-ascend build is only supported on Linux because But you can still set up dev env on Linux/Windows/macOS for linting and basic test as following commands: +#### Run lint locally ```bash # Choose a base dir (~/vllm-project/) and set up venv cd ~/vllm-project/ python3 -m venv .venv source ./.venv/bin/activate -# Clone vllm code and install -git clone https://github.com/vllm-project/vllm.git +# Clone vllm-ascend and install +git clone https://github.com/vllm-project/vllm-ascend.git +cd vllm-ascend + +# Install lint requirement and enable pre-commit hook +pip install -r requirements-lint.txt + +# Run lint (You need install pre-commits deps via proxy network at first time) +bash format.sh +``` + +#### Run CI locally + +After complete "Run lint" setup, you can run CI locally: + +```{code-block} bash + :substitutions: + +cd ~/vllm-project/ + +# Run CI need vLLM installed +git clone --branch |vllm_version| https://github.com/vllm-project/vllm.git cd vllm pip install -r requirements/build.txt VLLM_TARGET_DEVICE="empty" pip install . cd .. -# Clone vllm-ascend and install -git clone https://github.com/vllm-project/vllm-ascend.git +# Install requirements cd vllm-ascend -# install system requirement -apt install -y gcc g++ cmake libnuma-dev -# install project requirement +# For Linux: pip install -r requirements-dev.txt +# For non Linux: +cat requirements-dev.txt | grep -Ev '^#|^--|^$|^-r' | while read PACKAGE; do pip install "$PACKAGE"; done +cat requirements.txt | grep -Ev '^#|^--|^$|^-r' | while read PACKAGE; do pip install "$PACKAGE"; done -# Then you can run lint and mypy test -bash format.sh +# Run ci: +bash format.sh ci +``` -# Build: -# - only supported on Linux (torch_npu available) -# pip install -e . -# - build without deps for debugging in other OS -# pip install -e . --no-deps -# - build without custom ops -# COMPILE_CUSTOM_KERNELS=0 pip install -e . +#### Submit the commit +```bash # Commit changed files using `-s` git commit -sm "your commit info" ``` diff --git a/docs/source/user_guide/additional_config.md b/docs/source/user_guide/additional_config.md index e1b13bfcc..d58ac5ac8 100644 --- a/docs/source/user_guide/additional_config.md +++ b/docs/source/user_guide/additional_config.md @@ -1,6 +1,6 @@ # Additional Configuration -addintional configuration is a mechanism provided by vLLM to allow plugins to control inner behavior by their own. vLLM Ascend uses this mechanism to make the project more flexible. +additional configuration is a mechanism provided by vLLM to allow plugins to control inner behavior by their own. vLLM Ascend uses this mechanism to make the project more flexible. ## How to use diff --git a/format.sh b/format.sh index 32569e2c7..d08315378 100755 --- a/format.sh +++ b/format.sh @@ -19,325 +19,26 @@ # Adapted from https://github.com/vllm-project/vllm/tree/main/tools # -# YAPF formatter, adapted from ray and skypilot. -# -# Usage: -# # Do work and commit your work. - -# # Format files that differ from origin/main. -# bash format.sh - -# # Commit changed files with message 'Run yapf and ruff' -# -# -# YAPF + Clang formatter (if installed). This script formats all changed files from the last mergebase. -# You are encouraged to run this locally before pushing changes for review. - -# Cause the script to exit if a single command fails -set -eo pipefail - -# this stops git rev-parse from failing if we run this from the .git directory -builtin cd "$(dirname "${BASH_SOURCE:-$0}")" -ROOT="$(git rev-parse --show-toplevel)" -builtin cd "$ROOT" || exit 1 - check_command() { if ! command -v "$1" &> /dev/null; then - echo "❓❓$1 is not installed, please run \`pip install -r requirements-lint.txt\`" + echo "❓❓$1 is not installed, please run:" + echo "# Install lint deps" + echo "pip install -r requirements-lint.txt" + echo "# (optional) Enable git commit pre check" + echo "pre-commit install" + echo "" + echo "See step by step contribution guide:" + echo "https://vllm-ascend.readthedocs.io/en/latest/developer_guide/contribution" exit 1 fi } -check_command yapf -check_command ruff -check_command mypy -check_command codespell -check_command isort -check_command clang-format +check_command pre-commit -YAPF_VERSION=$(yapf --version | awk '{print $2}') -RUFF_VERSION=$(ruff --version | awk '{print $2}') -MYPY_VERSION=$(mypy --version | awk '{print $2}') -CODESPELL_VERSION=$(codespell --version) -ISORT_VERSION=$(isort --vn) -CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}') -SPHINX_LINT_VERSION=$(sphinx-lint --version | awk '{print $2}') - -# params: tool name, tool version, required version -tool_version_check() { - expected=$(grep "$1" requirements-lint.txt | cut -d'=' -f3) - if [[ "$2" != "$expected" ]]; then - echo "❓❓Wrong $1 version installed: $expected is required, not $2." - exit 1 - fi -} - -tool_version_check "yapf" "$YAPF_VERSION" -tool_version_check "ruff" "$RUFF_VERSION" -tool_version_check "mypy" "$MYPY_VERSION" -tool_version_check "isort" "$ISORT_VERSION" -tool_version_check "codespell" "$CODESPELL_VERSION" -tool_version_check "clang-format" "$CLANGFORMAT_VERSION" -tool_version_check "sphinx-lint" "$SPHINX_LINT_VERSION" - -YAPF_FLAGS=( - '--recursive' - '--parallel' -) - -YAPF_EXCLUDES=( - '--exclude' 'build/**' -) - -# Format specified files -format() { - yapf --in-place "${YAPF_FLAGS[@]}" "$@" -} - -# Format files that differ from main branch. Ignores dirs that are not slated -# for autoformat yet. -format_changed() { - # The `if` guard ensures that the list of filenames is not empty, which - # could cause yapf to receive 0 positional arguments, making it hang - # waiting for STDIN. - # - # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that - # exist on both branches. - MERGEBASE="$(git merge-base origin/main HEAD)" - - if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then - git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs -P 5 \ - yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}" - fi -} - -# Format all files -format_all() { - yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" . -} - -echo 'vllm-ascend yapf:' -## This flag formats individual files. --files *must* be the first command line -## arg to use this option. -if [[ "$1" == '--files' ]]; then - format "${@:2}" - # If `--all` is passed, then any further arguments are ignored and the - # entire python directory is formatted. -elif [[ "$1" == '--all' ]]; then - format_all +# TODO: cleanup SC exclude +export SHELLCHECK_OPTS="--exclude=SC2046,SC2006,SC2086" +if [[ "$1" != 'ci' ]]; then + pre-commit run --all-files else - # Format only the files that changed in last commit. - format_changed + pre-commit run --all-files --hook-stage manual fi -echo 'vllm-ascend yapf: Done' - -# Run mypy -echo 'vllm-ascend mypy:' -tools/mypy.sh -echo 'vllm-ascend mypy: Done' - - -# If git diff returns a file that is in the skip list, the file may be checked anyway: -# https://github.com/codespell-project/codespell/issues/1915 -# Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem -CODESPELL_EXCLUDES=( - '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**' -) - -CODESPELL_IGNORE_WORDS=( - '-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever' -) - -# check spelling of specified files -spell_check() { - codespell "$@" "${CODESPELL_IGNORE_WORDS[@]}" -} - -spell_check_all() { - codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}" -} - -# Spelling check of files that differ from main branch. -spell_check_changed() { - # The `if` guard ensures that the list of filenames is not empty, which - # could cause ruff to receive 0 positional arguments, making it hang - # waiting for STDIN. - # - # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that - # exist on both branches. - MERGEBASE="$(git merge-base origin/main HEAD)" - if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then - git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \ - codespell "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}" - codespell "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}" - fi -} - -echo 'vllm-ascend codespell:' -# Run Codespell -## This flag runs spell check of individual files. --files *must* be the first command line -## arg to use this option. -if [[ "$1" == '--files' ]]; then - spell_check "${@:2}" - # If `--all` is passed, then any further arguments are ignored and the - # entire python directory is linted. -elif [[ "$1" == '--all' ]]; then - spell_check_all -else - # Check spelling only of the files that changed in last commit. - spell_check_changed -fi -echo 'vllm-ascend codespell: Done' - - -# Lint specified files -lint() { - ruff check "$@" -} - -# Lint files that differ from main branch. Ignores dirs that are not slated -# for autolint yet. -lint_changed() { - # The `if` guard ensures that the list of filenames is not empty, which - # could cause ruff to receive 0 positional arguments, making it hang - # waiting for STDIN. - # - # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that - # exist on both branches. - MERGEBASE="$(git merge-base origin/main HEAD)" - - if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then - git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \ - ruff check - fi - -} - -echo 'vllm-ascend ruff:' -# Run Ruff -### This flag lints individual files. --files *must* be the first command line -### arg to use this option. -if [[ "$1" == '--files' ]]; then - lint "${@:2}" - # If `--all` is passed, then any further arguments are ignored and the - # entire python directory is linted. -elif [[ "$1" == '--all' ]]; then - lint vllm tests -else - # Format only the files that changed in last commit. - lint_changed -fi -echo 'vllm-ascend ruff: Done' - -# check spelling of specified files -isort_check() { - isort "$@" -} - -isort_check_all(){ - isort . -} - -# Spelling check of files that differ from main branch. -isort_check_changed() { - # The `if` guard ensures that the list of filenames is not empty, which - # could cause ruff to receive 0 positional arguments, making it hang - # waiting for STDIN. - # - # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that - # exist on both branches. - MERGEBASE="$(git merge-base origin/main HEAD)" - - if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then - git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \ - isort - fi -} - -echo 'vllm-ascend isort:' -# Run Isort -# This flag runs spell check of individual files. --files *must* be the first command line -# arg to use this option. -if [[ "$1" == '--files' ]]; then - isort_check "${@:2}" - # If `--all` is passed, then any further arguments are ignored and the - # entire python directory is linted. -elif [[ "$1" == '--all' ]]; then - isort_check_all -else - # Check spelling only of the files that changed in last commit. - isort_check_changed -fi -echo 'vllm-ascend isort: Done' - -# Clang-format section -# Exclude some files for formatting because they are vendored -CLANG_FORMAT_EXCLUDES=( - 'csrc/kernels/utils.h' 'csrc/kernels/pos_encoding_kernels.cpp' 'csrc/kernels/advance_step.cpp' 'csrc/kernels/get_masked_input_and_mask_kernel.cpp' 'csrc/torch_binding.cpp' 'csrc/ops.h' -) - -# Format specified files with clang-format -clang_format() { - clang-format -i "$@" -} - -# Format files that differ from main branch with clang-format. -clang_format_changed() { - # The `if` guard ensures that the list of filenames is not empty, which - # could cause clang-format to receive 0 positional arguments, making it hang - # waiting for STDIN. - # - # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that - # exist on both branches. - MERGEBASE="$(git merge-base origin/main HEAD)" - - # Get the list of changed files, excluding the specified ones - changed_files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.h' '*.cpp' '*.cu' '*.cuh' | (grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") || echo -e)) - if [ -n "$changed_files" ]; then - echo "$changed_files" | xargs -P 5 clang-format -i - fi -} - -# Format all files with clang-format -clang_format_all() { - find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \ - | grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") \ - | xargs clang-format -i -} - -# Run clang-format -if [[ "$1" == '--files' ]]; then - clang_format "${@:2}" -elif [[ "$1" == '--all' ]]; then - clang_format_all -else - clang_format_changed -fi -echo 'vllm-ascend clang-format: Done' - -echo 'vllm-ascend actionlint:' -tools/actionlint.sh -color -echo 'vllm-ascend actionlint: Done' - -echo 'vllm-ascend shellcheck:' -tools/shellcheck.sh -echo 'vllm-ascend shellcheck: Done' - -echo 'excalidraw png check:' -tools/png-lint.sh -echo 'excalidraw png check: Done' - -if ! git diff --quiet &>/dev/null; then - echo - echo "🔍🔍There are files changed by the format checker or by you that are not added and committed:" - git --no-pager diff --name-only - echo "🔍🔍Format checker passed, but please add, commit and push all the files above to include changes made by the format checker." - - exit 1 -else - echo "✨🎉 Format check passed! Congratulations! 🎉✨" -fi - -# echo 'vLLM sphinx-lint:' -# tools/sphinx-lint.sh -# echo 'vLLM sphinx-lint: Done' diff --git a/requirements-lint.txt b/requirements-lint.txt index 1789d184f..eab3838f0 100644 --- a/requirements-lint.txt +++ b/requirements-lint.txt @@ -1,15 +1,8 @@ # formatting -yapf==0.32.0 -toml==0.10.2 -tomli==2.0.2 -ruff==0.6.5 -codespell==2.3.0 -isort==5.13.2 -clang-format==18.1.5 -sphinx-lint==1.0.0 +pre-commit==4.0.1 # type checking -mypy==1.15.0 +mypy==1.11.1 types-PyYAML types-requests types-setuptools diff --git a/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py b/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py index 2aa3c0778..d328b4bdd 100644 --- a/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py +++ b/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py @@ -324,8 +324,10 @@ def test_stop_via_update_from_output(): model_output = ModelRunnerOutput( req_ids=[req.request_id for req in requests], - req_id_to_index={req.request_id: i - for i, req in enumerate(requests)}, + req_id_to_index={ + req.request_id: i + for i, req in enumerate(requests) + }, sampled_token_ids=[[EOS_TOKEN_ID], [10, 11]], # First request hits EOS, second continues @@ -374,8 +376,10 @@ def test_stop_via_update_from_output(): model_output = ModelRunnerOutput( req_ids=[req.request_id for req in requests], - req_id_to_index={req.request_id: i - for i, req in enumerate(requests)}, + req_id_to_index={ + req.request_id: i + for i, req in enumerate(requests) + }, sampled_token_ids=[[10, 42, 12], [13, 14]], # First request hits stop token spec_token_ids=None, @@ -422,8 +426,10 @@ def test_stop_via_update_from_output(): model_output = ModelRunnerOutput( req_ids=[req.request_id for req in requests], - req_id_to_index={req.request_id: i - for i, req in enumerate(requests)}, + req_id_to_index={ + req.request_id: i + for i, req in enumerate(requests) + }, sampled_token_ids=[[10, 11, 12], [13]], # First request exceeds max_tokens spec_token_ids=None, diff --git a/tests/e2e/singlecard/ops/test_vocabparallelembedding.py b/tests/e2e/singlecard/ops/test_vocabparallelembedding.py index 97d6c7059..c53308088 100644 --- a/tests/e2e/singlecard/ops/test_vocabparallelembedding.py +++ b/tests/e2e/singlecard/ops/test_vocabparallelembedding.py @@ -21,8 +21,8 @@ def get_masked_input_and_mask_ref( added_vocab_start_index: int, added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]: """Reference implementation for verification""" - org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < - org_vocab_end_index) + org_vocab_mask = (input_ >= org_vocab_start_index) & ( + input_ < org_vocab_end_index) added_vocab_mask = (input_ >= added_vocab_start_index) & ( input_ < added_vocab_end_index) added_offset = added_vocab_start_index - ( diff --git a/tests/e2e/singlecard/sample/test_rejection_sampler.py b/tests/e2e/singlecard/sample/test_rejection_sampler.py index 1b92aca19..123e7c20c 100644 --- a/tests/e2e/singlecard/sample/test_rejection_sampler.py +++ b/tests/e2e/singlecard/sample/test_rejection_sampler.py @@ -394,8 +394,8 @@ def test_rejection_sampling_approximates_target_distribution(): distance_wrt_reference) expected_improvement_multiplier = 20 - assert (relative_change_in_distance_wrt_target > - relative_change_in_distance_wrt_reference * + assert (relative_change_in_distance_wrt_target + > relative_change_in_distance_wrt_reference * expected_improvement_multiplier) diff --git a/tests/e2e/singlecard/test_scheduler.py b/tests/e2e/singlecard/test_scheduler.py index c4c9b7f55..bcf56a7a2 100644 --- a/tests/e2e/singlecard/test_scheduler.py +++ b/tests/e2e/singlecard/test_scheduler.py @@ -231,8 +231,10 @@ def test_stop_via_update_from_output(): model_output = ModelRunnerOutput( req_ids=[req.request_id for req in requests], - req_id_to_index={req.request_id: i - for i, req in enumerate(requests)}, + req_id_to_index={ + req.request_id: i + for i, req in enumerate(requests) + }, sampled_token_ids=[[EOS_TOKEN_ID], [10, 11]], # First request hits EOS, second continues @@ -279,8 +281,10 @@ def test_stop_via_update_from_output(): model_output = ModelRunnerOutput( req_ids=[req.request_id for req in requests], - req_id_to_index={req.request_id: i - for i, req in enumerate(requests)}, + req_id_to_index={ + req.request_id: i + for i, req in enumerate(requests) + }, sampled_token_ids=[[10, 42, 12], [13, 14]], # First request hits stop token spec_token_ids=None, @@ -325,8 +329,10 @@ def test_stop_via_update_from_output(): model_output = ModelRunnerOutput( req_ids=[req.request_id for req in requests], - req_id_to_index={req.request_id: i - for i, req in enumerate(requests)}, + req_id_to_index={ + req.request_id: i + for i, req in enumerate(requests) + }, sampled_token_ids=[[10, 11, 12], [13]], # First request exceeds max_tokens spec_token_ids=None, diff --git a/tools/enforce_regex_import.py b/tools/enforce_regex_import.py new file mode 100644 index 000000000..92e6f79e3 --- /dev/null +++ b/tools/enforce_regex_import.py @@ -0,0 +1,104 @@ +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# Adapted from https://github.com/vllm-project/vllm/tree/main/tools +# + +from __future__ import annotations + +import subprocess +from pathlib import Path + +import regex as re + +FORBIDDEN_PATTERNS = re.compile( + r'^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)') +ALLOWED_PATTERNS = [ + re.compile(r'^\s*import\s+regex\s+as\s+re\s*$'), + re.compile(r'^\s*import\s+regex\s*$'), +] + + +def get_staged_python_files() -> list[str]: + try: + result = subprocess.run( + ['git', 'diff', '--cached', '--name-only', '--diff-filter=AM'], + capture_output=True, + text=True, + check=True) + files = result.stdout.strip().split( + '\n') if result.stdout.strip() else [] + return [f for f in files if f.endswith('.py')] + except subprocess.CalledProcessError: + return [] + + +def is_forbidden_import(line: str) -> bool: + line = line.strip() + return bool( + FORBIDDEN_PATTERNS.match(line) + and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS)) + + +def check_file(filepath: str) -> list[tuple[int, str]]: + violations = [] + try: + with open(filepath, encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + if is_forbidden_import(line): + violations.append((line_num, line.strip())) + except (OSError, UnicodeDecodeError): + pass + return violations + + +def main() -> int: + files = get_staged_python_files() + if not files: + return 0 + + total_violations = 0 + + for filepath in files: + if not Path(filepath).exists(): + continue + + if filepath == "setup.py": + continue + + violations = check_file(filepath) + if violations: + print(f"\n❌ {filepath}:") + for line_num, line in violations: + print(f" Line {line_num}: {line}") + total_violations += 1 + + if total_violations > 0: + print(f"\n💡 Found {total_violations} violation(s).") + print("❌ Please replace 'import re' with 'import regex as re'") + print( + " Also replace 'from re import ...' with 'from regex import ...'" + ) # noqa: E501 + print("✅ Allowed imports:") + print(" - import regex as re") + print(" - import regex") # noqa: E501 + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/mypy.sh b/tools/mypy.sh index 57a3d270f..bf9bc774d 100755 --- a/tools/mypy.sh +++ b/tools/mypy.sh @@ -20,12 +20,16 @@ # CI=${1:-0} -PYTHON_VERSION=${2:-3.9} +PYTHON_VERSION=${2:-local} if [ "$CI" -eq 1 ]; then set -e fi +if [ $PYTHON_VERSION == "local" ]; then + PYTHON_VERSION=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') +fi + run_mypy() { echo "Running mypy on $1" mypy --check-untyped-defs --follow-imports skip --python-version "${PYTHON_VERSION}" "$@" diff --git a/tools/shellcheck.sh b/tools/shellcheck.sh index a10e17356..e0833f395 100755 --- a/tools/shellcheck.sh +++ b/tools/shellcheck.sh @@ -39,3 +39,7 @@ if ! [ -x "$(command -v shellcheck)" ]; then PATH="$PATH:$(pwd)/shellcheck-${scversion}" export PATH fi + +# should enable this +# find . -path ./.git -prune -o -name "*.sh" -print0 \ +# | xargs -0 -I {} sh -c 'git check-ignore -q "{}" || shellcheck -s bash "{}"' diff --git a/typos.toml b/typos.toml new file mode 100644 index 000000000..bd75b50aa --- /dev/null +++ b/typos.toml @@ -0,0 +1,177 @@ +[files] +# these files may be written in non english words +extend-exclude = [] +ignore-hidden = true +ignore-files = true +ignore-dot = true +ignore-vcs = true +ignore-global = true +ignore-parent = true + +[default] +binary = false +check-filename = false +check-file = true +unicode = true +ignore-hex = true +identifier-leading-digits = false +locale = "en" +extend-ignore-identifiers-re = [".*Unc.*", ".*_thw", + ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*", + ".*ot.*", ".*[Tt]h[rR].*"] +extend-ignore-words-re = ["CANN", "cann"] +extend-ignore-re = [] + +[default.extend-identifiers] +nd_to_nz_2d = "nd_to_nz_2d" +bbc5b7ede = "bbc5b7ede" +womens_doubles = "womens_doubles" +v_2nd = "v_2nd" +splitted_input = "splitted_input" +NOOPs = "NOOPs" +typ = "typ" +nin_shortcut = "nin_shortcut" +UperNetDecoder = "UperNetDecoder" +subtile = "subtile" +SFOuput = "SFOuput" +# huggingface transformers repo uses these words +depthwise_seperable_out_channel = "depthwise_seperable_out_channel" +DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d" +depthwise_seperable_CNN = "depthwise_seperable_CNN" + +[default.extend-words] +iy = "iy" +tendencias = "tendencias" +# intel cpu features +tme = "tme" +dout = "dout" +Pn = "Pn" +arange = "arange" + +[type.py] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.py.extend-identifiers] +arange = "arange" +NDArray = "NDArray" +EOFError = "EOFError" + +[type.py.extend-words] + +[type.cpp] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.cpp.extend-identifiers] +countr_one = "countr_one" + +[type.cpp.extend-words] + +[type.rust] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.rust.extend-identifiers] +flate2 = "flate2" + +[type.rust.extend-words] +ser = "ser" + +[type.lock] +extend-glob = [] +check-file = false +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.lock.extend-identifiers] + +[type.lock.extend-words] + +[type.jl] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.jl.extend-identifiers] + +[type.jl.extend-words] +modul = "modul" +egals = "egals" +usig = "usig" +egal = "egal" + +[type.go] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.go.extend-identifiers] +flate = "flate" + +[type.go.extend-words] + +[type.css] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.css.extend-identifiers] +nd = "nd" + +[type.css.extend-words] + +[type.man] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.man.extend-identifiers] +Nd = "Nd" + +[type.man.extend-words] + +[type.cert] +extend-glob = [] +check-file = false +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.cert.extend-identifiers] + +[type.cert.extend-words] + +[type.sh] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.sh.extend-identifiers] +stap = "stap" +ot = "ot" + +[type.sh.extend-words] + +[type.vimscript] +extend-glob = [] +extend-ignore-identifiers-re = [] +extend-ignore-words-re = [] +extend-ignore-re = [] + +[type.vimscript.extend-identifiers] +windo = "windo" + +[type.vimscript.extend-words] diff --git a/vllm_ascend/core/scheduler.py b/vllm_ascend/core/scheduler.py index 00a17ddfc..ecd7bece6 100644 --- a/vllm_ascend/core/scheduler.py +++ b/vllm_ascend/core/scheduler.py @@ -232,7 +232,7 @@ class AscendScheduler(Scheduler): token_budget -= num_new_tokens request.status = RequestStatus.RUNNING request.num_computed_tokens = num_computed_tokens - # Count the number of prifix cached tokens. + # Count the number of prefix cached tokens. if request.num_cached_tokens < 0: request.num_cached_tokens = num_computed_tokens diff --git a/vllm_ascend/distributed/kv_transfer/simple_connector.py b/vllm_ascend/distributed/kv_transfer/simple_connector.py index 7b05052d0..31b38c068 100644 --- a/vllm_ascend/distributed/kv_transfer/simple_connector.py +++ b/vllm_ascend/distributed/kv_transfer/simple_connector.py @@ -199,8 +199,11 @@ class SimpleConnector(KVConnectorBase): model_executable: torch.nn.Module, model_input: "ModelInputForGPUWithSamplingMetadata", kv_caches: List[torch.Tensor], - ) -> Tuple[Union[torch.Tensor, IntermediateTensors], bool, - "ModelInputForGPUWithSamplingMetadata", ]: + ) -> Tuple[ + Union[torch.Tensor, IntermediateTensors], + bool, + "ModelInputForGPUWithSamplingMetadata", + ]: bypass_model_exec = True model_config = self.model_config diff --git a/vllm_ascend/models/deepseek_mtp.py b/vllm_ascend/models/deepseek_mtp.py index 979a6099f..3cbc62e80 100644 --- a/vllm_ascend/models/deepseek_mtp.py +++ b/vllm_ascend/models/deepseek_mtp.py @@ -108,7 +108,8 @@ class CustomDeepSeekMultiTokenPredictor(DeepSeekMultiTokenPredictor): self.num_mtp_layers = config.num_nextn_predict_layers # to map the exact layer index from weights self.layers = torch.nn.ModuleDict({ - str(idx): CustomDeepSeekMultiTokenPredictorLayer( + str(idx): + CustomDeepSeekMultiTokenPredictorLayer( config, f"{prefix}.layers.{idx}", model_config=vllm_config.model_config, diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py index 34d665a1c..ff777a143 100644 --- a/vllm_ascend/ops/fused_moe.py +++ b/vllm_ascend/ops/fused_moe.py @@ -79,8 +79,9 @@ def process_topk_ids(topk_ids: torch.Tensor, expert_num: int, ep_size: int, experts_per_ep_rank_val).to(original_dtype) indices_arange = torch.arange(topk_ids.shape[0], device=device) - is_new_segment = torch.cat((torch.tensor([True], device=device), - assigned_ep_rank[1:] != assigned_ep_rank[:-1])) + is_new_segment = torch.cat( + (torch.tensor([True], device=device), assigned_ep_rank[1:] + != assigned_ep_rank[:-1])) temp_start_markers = torch.full_like(indices_arange, -1, dtype=indices_arange.dtype) @@ -469,13 +470,13 @@ def fused_experts_with_all2all_buffer( expert_idx_buffer_scatter.shape, dtype=expert_idx_buffer_scatter.dtype, device=expert_idx_buffer_scatter.device) - non_pad_len = torch.sum( - (expert_idx_buffer_scatter != global_num_experts).to(torch.int32)) - hidden_states_pad_idx[ - expert_idx_buffer_scatter != global_num_experts] = torch.arange( - non_pad_len, - dtype=expert_idx_buffer_scatter.dtype, - device=hidden_states.device) + non_pad_len = torch.sum((expert_idx_buffer_scatter + != global_num_experts).to(torch.int32)) + hidden_states_pad_idx[expert_idx_buffer_scatter != + global_num_experts] = torch.arange( + non_pad_len, + dtype=expert_idx_buffer_scatter.dtype, + device=hidden_states.device) hidden_states_buffer_scatter = hidden_states[hidden_states_pad_idx] expert_idx_buffer_gather = torch.empty_like( @@ -528,8 +529,8 @@ def fused_experts_with_all2all_buffer( dist.all_to_all_single(hidden_states_gatter, hidden_states_scatter, group=ep_group.device_group) - hidden_states_gatter = hidden_states_gatter[ - expert_idx_buffer_scatter != global_num_experts] + hidden_states_gatter = hidden_states_gatter[expert_idx_buffer_scatter != + global_num_experts] if hidden_states_gatter.shape[0] != row_idx_len: hidden_states = torch.zeros((row_idx_len, hidden_states.shape[1]), dtype=hidden_states.dtype, diff --git a/vllm_ascend/ops/vocab_parallel_embedding.py b/vllm_ascend/ops/vocab_parallel_embedding.py index b326f0ce5..13889e01c 100644 --- a/vllm_ascend/ops/vocab_parallel_embedding.py +++ b/vllm_ascend/ops/vocab_parallel_embedding.py @@ -30,8 +30,8 @@ def get_masked_input_and_mask( added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]: # torch.compile will fuse all of the pointwise ops below # into a single kernel, making it very fast - org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < - org_vocab_end_index) + org_vocab_mask = (input_ >= org_vocab_start_index) & ( + input_ < org_vocab_end_index) added_vocab_mask = (input_ >= added_vocab_start_index) & ( input_ < added_vocab_end_index) added_offset = added_vocab_start_index - ( diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index c77fce0db..47e827498 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -880,8 +880,8 @@ class NPUModelRunner(LoRAModelRunnerMixin): assert total_num_scheduled_tokens > 0 num_reqs = self.input_batch.num_reqs assert num_reqs > 0 - if (self.use_aclgraph and - total_num_scheduled_tokens <= self.aclgraph_batch_sizes[-1]): + if (self.use_aclgraph and total_num_scheduled_tokens + <= self.aclgraph_batch_sizes[-1]): # Add padding to the batch size. num_input_tokens = self.vllm_config.pad_for_cudagraph( total_num_scheduled_tokens)