[1/N][CI] Move linting system to pre-commits hooks (#1256)

### What this PR does / why we need it? Follow vllm-project/vllm lint way: https://github.com/vllm-project/vllm/blob/main/.pre-commit-config.yaml Enable pre-commit to avoid some low level error AMAP. This pr is one step of #1241, The purpose is make linting system more clear and convenient, on this step, Mainly did the following things: yapf, actionlint, ruff, typos, isort, mypy, png-lint, signoff-commit, enforce-import-regex-instead-of-re. TODO: - clang-format(check for csrc with google style) need clean code, disable for now - pymarkdown need clean code, disable for now - shellcheck need clean code, disable for now ### Does this PR introduce _any_ user-facing change? Only developer UX change: https://vllm-ascend--1256.org.readthedocs.build/en/1256/developer_guide/contributing.html#run-lint-locally ``` pip install -r requirements-lint.txt && pre-commit install bash format.sh ``` ### How was this patch tested? CI passed with new added/existing test. Co-authored-by: Yikun [yikunkero@gmail.com](mailto:yikunkero@gmail.com) Co-authored-by: wangli [wangli858794774@gmail.com](mailto:wangli858794774@gmail.com) - vLLM version: v0.9.1 - vLLM main: 5358cce5ff --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-10-20 05:33:51 +08:00 · 2025-07-10 14:17:15 +08:00
parent 643e6f5486
commit c7446438a9
28 changed files with 753 additions and 667 deletions
--- a/.github/ISSUE_TEMPLATE/900-release-checklist.yml
+++ b/.github/ISSUE_TEMPLATE/900-release-checklist.yml
@ -95,6 +95,6 @@ body:

      - [ ] Upload 310p wheel to Github release page

-      - [ ] Brodcast the release news (By message, blog , etc)
+      - [ ] Broadcast the release news (By message, blog , etc)

      - [ ] Close this issue
--- a/.github/workflows/doc_codespell.yaml
+++ b/.github/workflows/doc_codespell.yaml
@ -1,33 +0,0 @@
-
-name: 'doc-codespell'
-
-on:
-  pull_request:
-    branches:
-      - 'main'
-      - '*-dev'
-    paths:
-      - 'docs/**'
-
-jobs:
-  codespell:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.10"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
-      - name: Run codespell check
-        run: |
-          CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
-          CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever')
-
-          codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -0,0 +1,37 @@
+name: pre-commit
+
+on:
+    workflow_call:
+
+permissions:
+  contents: read
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout vllm-project/vllm-ascend repo
+      uses: actions/checkout@v4
+    - uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
+      with:
+        python-version: "3.10"
+    - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
+    - run: echo "::add-matcher::.github/workflows/matchers/mypy.json"
+    - name: Checkout vllm-project/vllm repo
+      uses: actions/checkout@v4
+      with:
+        repository: vllm-project/vllm
+        path: ./vllm-empty
+    - name: Install vllm
+      working-directory: vllm-empty
+      run: |
+        pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        VLLM_TARGET_DEVICE=empty pip install .
+    - name: Install vllm-ascend dev
+      run: |
+        pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
+    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+      env:
+        SHELLCHECK_OPTS: "--exclude=SC2046,SC2006,SC2086" # Exclude SC2046, SC2006, SC2086 for actionlint
+      with:
+        extra_args: --all-files --hook-stage manual
--- a/.github/workflows/shellcheck.yml
+++ b/.github/workflows/shellcheck.yml
@ -1,49 +0,0 @@
-#
-# Copyright 2023 The vLLM team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Adapted from vllm-project/vllm/blob/main/.github
-#
-
-name: Lint shell scripts
-on:
-  pull_request:
-    branches:
-      - 'main'
-      - '*-dev'
-    paths:
-      - '**/*.sh'
-      - '.github/workflows/shellcheck.yml'
-
-env:
-  LC_ALL: en_US.UTF-8
-
-defaults:
-  run:
-    shell: bash
-
-permissions:
-  contents: read
-
-jobs:
-  shellcheck:
-    runs-on: ubuntu-latest
-    steps:
-      - name: "Checkout"
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          fetch-depth: 0
-
-      - name: "Check shell scripts"
-        run: |
-          tools/shellcheck.sh
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@ -69,67 +69,7 @@ jobs:
  lint:
    # Only trigger lint on pull request
    if: ${{ github.event_name == 'pull_request' }}
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.10"]
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
-      - name: Run codespell check
-        run: |
-          CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
-          CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever')
-
-          codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
-      - name: Analysing the code with ruff
-        run: |
-          echo "::add-matcher::.github/workflows/matchers/ruff.json"
-          ruff check --output-format github .
-      - name: Run isort
-        run: |
-          isort . --check-only
-      - name: Running yapf
-        run: |
-          python -m pip install --upgrade pip
-          pip install toml
-          pip install yapf==0.32.0
-          yapf --diff --recursive .
-
-      - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v4
-        with:
-          repository: vllm-project/vllm
-          path: vllm-empty
-
-      - name: Actionlint Check
-        env:
-          SHELLCHECK_OPTS: --exclude=SC2046,SC2006,SC2086
-        run: |
-          echo "::add-matcher::.github/workflows/matchers/actionlint.json"
-          tools/actionlint.sh -color
-
-      - name: Install vllm-project/vllm from source
-        working-directory: vllm-empty
-        run: |
-          pip install -r requirements/build.txt --extra-index-url https://download.pytorch.org/whl/cpu
-          VLLM_TARGET_DEVICE=empty pip install .
-
-      - name: Install dependencies
-        run: |
-          pip install -r requirements-dev.txt --extra-index-url https://download.pytorch.org/whl/cpu
-
-      - name: Mypy Check
-        run: |
-          echo "::add-matcher::.github/workflows/matchers/mypy.json"
-          tools/mypy.sh 1 ${{ matrix.python-version }}
+    uses: ./.github/workflows/pre-commit.yml

  ut:
    needs: [lint]
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,141 @@
+default_install_hook_types:
+  - pre-commit
+  - commit-msg
+default_stages:
+  - pre-commit # Run locally
+  - manual # Run in CI
+exclude: 'examples/.*' # Exclude examples from all hooks by default
+repos:
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.4.1
+  hooks:
+    - id: codespell
+      args: [
+        --toml, pyproject.toml,
+        '--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
+        '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn'
+      ]
+      additional_dependencies:
+        - tomli
+- repo: https://github.com/google/yapf
+  rev: v0.43.0
+  hooks:
+  - id: yapf
+    args: [--in-place, --verbose]
+    # Keep the same list from yapfignore here to avoid yapf failing without any inputs
+    exclude: '(.github|benchmarks|examples|docs)/.*'
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  rev: v0.11.7
+  hooks:
+  - id: ruff
+    args: [--output-format, github, --fix]
+  - id: ruff-format
+    files: ^(benchmarks|examples)/.*
+- repo: https://github.com/crate-ci/typos
+  rev: v1.32.0
+  hooks:
+  - id: typos
+- repo: https://github.com/PyCQA/isort
+  rev: 6.0.1
+  hooks:
+  - id: isort
+# - repo: https://github.com/pre-commit/mirrors-clang-format
+#   rev: v20.1.3
+#   hooks:
+#   - id: clang-format
+#     files: ^csrc/.*\.(cpp|hpp|cc|hh|cxx|hxx)$
+#     types_or: [c++]
+#     args: [--style=google, --verbose]
+# - repo: https://github.com/jackdewinter/pymarkdown
+#   rev: v0.9.29
+#   hooks:
+#   - id: pymarkdown
+#     args: [fix]
+- repo: https://github.com/rhysd/actionlint
+  rev: v1.7.7
+  hooks:
+  - id: actionlint
+- repo: local
+  hooks:
+  # For local development, you can run mypy using tools/mypy.sh script if needed.
+  # - id: mypy-local
+  #   name: Run mypy for local Python installation
+  #   entry: tools/mypy.sh 0 "local"
+  #   language: system
+  #   types: [python]
+  #   stages: [pre-commit] # Don't run in CI
+  - id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.9
+    entry: tools/mypy.sh 1 "3.9"
+    # Use system python because vllm installation is required
+    language: system
+    types: [python]
+    stages: [manual] # Only run in CI
+  - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.10
+    entry: tools/mypy.sh 1 "3.10"
+    # Use system python because vllm installation is required
+    language: system
+    types: [python]
+    stages: [manual] # Only run in CI
+  - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.11
+    entry: tools/mypy.sh 1 "3.11"
+    # Use system python because vllm installation is required
+    language: system
+    types: [python]
+    stages: [manual] # Only run in CI
+  - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
+    name: Run mypy for Python 3.12
+    entry: tools/mypy.sh 1 "3.12"
+    # Use system python because vllm installation is required
+    language: system
+    types: [python]
+    stages: [manual] # Only run in CI
+  # FIXME: enable shellcheck
+  # - id: shellcheck
+  #   name: Lint shell scripts
+  #   entry: tools/shellcheck.sh
+  #   language: script
+  #   types: [shell]
+  - id: png-lint
+    name: Lint PNG exports from excalidraw
+    entry: tools/png-lint.sh
+    language: script
+    types: [png]
+  - id: signoff-commit
+    name: Sign-off Commit
+    entry: bash
+    args:
+      - -c
+      - |
+        if ! grep -q "^Signed-off-by: $(git config user.name) <$(git config user.email)>" "$(git rev-parse --git-path COMMIT_EDITMSG)"; then
+          printf "\nSigned-off-by: $(git config user.name) <$(git config user.email)>\n" >> "$(git rev-parse --git-path COMMIT_EDITMSG)"
+        fi
+    language: system
+    verbose: true
+    stages: [commit-msg]
+  - id: check-filenames
+    name: Check for spaces in all filenames
+    entry: bash
+    args:
+      - -c
+      - 'git ls-files | grep " " && echo "Filenames should not contain spaces!" && exit 1 || exit 0'
+    language: system
+    always_run: true
+    pass_filenames: false
+  - id: enforce-import-regex-instead-of-re
+    name: Enforce import regex as re
+    entry: python tools/enforce_regex_import.py
+    language: python
+    types: [python]
+    pass_filenames: false
+    additional_dependencies: [regex]
+  # Keep `suggestion` last
+  - id: suggestion
+    name: Suggestion
+    entry: bash -c 'echo "To bypass pre-commit hooks, add --no-verify to git commit."'
+    language: system
+    verbose: true
+    pass_filenames: false
+  # Insert new entries above the `suggestion` entry
--- a/benchmarks/ops/ben_vocabparallelembedding.py
+++ b/benchmarks/ops/ben_vocabparallelembedding.py
@ -12,12 +12,12 @@ import vllm_ascend.platform  # noqa: F401
 def benchmark_npu(fn, num_iterations=100, num_warmup_iterations=50):
    """
    Benchmark function for NPU operations
-    
+
    Args:
        fn: Function to benchmark
        num_iterations: Number of timing iterations
        num_warmup_iterations: Number of warmup iterations
-    
+
    Returns:
        float: Minimum elapsed time in seconds
    """
@ -41,19 +41,26 @@ def benchmark_npu(fn, num_iterations=100, num_warmup_iterations=50):


 def get_masked_input_and_mask_ref(
-        input_: torch.Tensor, org_vocab_start_index: int,
-        org_vocab_end_index: int, num_org_vocab_padding: int,
-        added_vocab_start_index: int,
-        added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
+    input_: torch.Tensor,
+    org_vocab_start_index: int,
+    org_vocab_end_index: int,
+    num_org_vocab_padding: int,
+    added_vocab_start_index: int,
+    added_vocab_end_index: int,
+) -> Tuple[torch.Tensor, torch.Tensor]:
    """Reference implementation for verification"""
-    org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ <
-                                                          org_vocab_end_index)
+    org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ < org_vocab_end_index)
    added_vocab_mask = (input_ >= added_vocab_start_index) & (
-        input_ < added_vocab_end_index)
-    added_offset = added_vocab_start_index - (
-        org_vocab_end_index - org_vocab_start_index) - num_org_vocab_padding
-    valid_offset = (org_vocab_start_index *
-                    org_vocab_mask) + (added_offset * added_vocab_mask)
+        input_ < added_vocab_end_index
+    )
+    added_offset = (
+        added_vocab_start_index
+        - (org_vocab_end_index - org_vocab_start_index)
+        - num_org_vocab_padding
+    )
+    valid_offset = (org_vocab_start_index * org_vocab_mask) + (
+        added_offset * added_vocab_mask
+    )
    vocab_mask = org_vocab_mask | added_vocab_mask
    masked_input = vocab_mask * (input_ - valid_offset)
    return masked_input, ~vocab_mask
@ -94,21 +101,25 @@ def test_get_masked_input_and_mask(

    # Define reference function
    def ref_fn():
-        return get_masked_input_and_mask_ref(input_tensor,
-                                             test_case["org_start"],
-                                             test_case["org_end"],
-                                             test_case["padding"],
-                                             test_case["added_start"],
-                                             test_case["added_end"])
+        return get_masked_input_and_mask_ref(
+            input_tensor,
+            test_case["org_start"],
+            test_case["org_end"],
+            test_case["padding"],
+            test_case["added_start"],
+            test_case["added_end"],
+        )

    # Define custom function
    def custom_fn():
-        return torch.ops._C.get_masked_input_and_mask(input_tensor,
-                                                      test_case["org_start"],
-                                                      test_case["org_end"],
-                                                      test_case["padding"],
-                                                      test_case["added_start"],
-                                                      test_case["added_end"])
+        return torch.ops._C.get_masked_input_and_mask(
+            input_tensor,
+            test_case["org_start"],
+            test_case["org_end"],
+            test_case["padding"],
+            test_case["added_start"],
+            test_case["added_end"],
+        )

    # Get results for correctness testing
    ref_masked_input, ref_mask = ref_fn()
@ -120,9 +131,9 @@ def test_get_masked_input_and_mask(

    # Print performance results
    print("\nPerformance Results:")
-    print(f"Reference implementation: {ref_time*1000:.3f} ms")
-    print(f"Custom implementation: {custom_time*1000:.3f} ms")
-    print(f"Speedup: {ref_time/custom_time:.2f}x")
+    print(f"Reference implementation: {ref_time * 1000:.3f} ms")
+    print(f"Custom implementation: {custom_time * 1000:.3f} ms")
+    print(f"Speedup: {ref_time / custom_time:.2f}x")

    # Compare results for correctness
    ref_masked_input = ref_masked_input.to(dtype)
@ -136,9 +147,12 @@ def test_get_masked_input_and_mask(
        ref_masked_input,
        rtol=1e-5,
        atol=1e-5,
-        msg=f"Masked input mismatch for case: {test_case}")
-    torch.testing.assert_close(custom_mask,
-                               ref_mask,
-                               rtol=1e-5,
-                               atol=1e-5,
-                               msg=f"Mask mismatch for case: {test_case}")
+        msg=f"Masked input mismatch for case: {test_case}",
+    )
+    torch.testing.assert_close(
+        custom_mask,
+        ref_mask,
+        rtol=1e-5,
+        atol=1e-5,
+        msg=f"Mask mismatch for case: {test_case}",
+    )
--- a/benchmarks/scripts/convert_json_to_markdown.py
+++ b/benchmarks/scripts/convert_json_to_markdown.py
@ -49,36 +49,43 @@ def read_markdown(file):


 def results_to_json(latency, throughput, serving):
-    return json.dumps({
-        'latency': latency.to_dict(),
-        'throughput': throughput.to_dict(),
-        'serving': serving.to_dict()
-    })
+    return json.dumps(
+        {
+            "latency": latency.to_dict(),
+            "throughput": throughput.to_dict(),
+            "serving": serving.to_dict(),
+        }
+    )


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
-        description="Process the results of the benchmark tests.")
+        description="Process the results of the benchmark tests."
+    )
    parser.add_argument(
        "--results_folder",
        type=str,
        default="../results/",
-        help="The folder where the benchmark results are stored.")
+        help="The folder where the benchmark results are stored.",
+    )
    parser.add_argument(
        "--output_folder",
        type=str,
        default="../results/",
-        help="The folder where the benchmark results are stored.")
-    parser.add_argument("--markdown_template",
-                        type=str,
-                        default="./perf_result_template.md",
-                        help="The template file for the markdown report.")
-    parser.add_argument("--tag",
-                        default="main",
-                        help="Tag to be used for release message.")
-    parser.add_argument("--commit_id",
-                        default="",
-                        help="Commit ID to be used for release message.")
+        help="The folder where the benchmark results are stored.",
+    )
+    parser.add_argument(
+        "--markdown_template",
+        type=str,
+        default="./perf_result_template.md",
+        help="The template file for the markdown report.",
+    )
+    parser.add_argument(
+        "--tag", default="main", help="Tag to be used for release message."
+    )
+    parser.add_argument(
+        "--commit_id", default="", help="Commit ID to be used for release message."
+    )

    args = parser.parse_args()
    results_folder = (CUR_PATH / args.results_folder).resolve()
@ -87,7 +94,6 @@ if __name__ == "__main__":

    # collect results
    for test_file in results_folder.glob("*.json"):
-
        with open(test_file) as f:
            raw_result = json.loads(f.read())

@ -111,7 +117,8 @@ if __name__ == "__main__":
            for perc in [10, 25, 50, 75, 90, 99]:
                # Multiply 1000 to convert the time unit from s to ms
                raw_result.update(
-                    {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
+                    {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}
+                )
            raw_result["avg_latency"] = raw_result["avg_latency"] * 1000

            # add the result to raw_result
@ -129,55 +136,53 @@ if __name__ == "__main__":
            continue

        print(f"Skipping {test_file}")
-    serving_results.sort(key=lambda x: (len(x['test_name']), x['test_name']))
+    serving_results.sort(key=lambda x: (len(x["test_name"]), x["test_name"]))

    latency_results = pd.DataFrame.from_dict(latency_results)
    serving_results = pd.DataFrame.from_dict(serving_results)
    throughput_results = pd.DataFrame.from_dict(throughput_results)

-    raw_results_json = results_to_json(latency_results, throughput_results,
-                                       serving_results)
+    raw_results_json = results_to_json(
+        latency_results, throughput_results, serving_results
+    )

    # remapping the key, for visualization purpose
    if not latency_results.empty:
-        latency_results = latency_results[list(
-            latency_column_mapping.keys())].rename(
-                columns=latency_column_mapping)
+        latency_results = latency_results[list(latency_column_mapping.keys())].rename(
+            columns=latency_column_mapping
+        )
    if not serving_results.empty:
-        serving_results = serving_results[list(
-            serving_column_mapping.keys())].rename(
-                columns=serving_column_mapping)
+        serving_results = serving_results[list(serving_column_mapping.keys())].rename(
+            columns=serving_column_mapping
+        )
    if not throughput_results.empty:
-        throughput_results = throughput_results[list(
-            throughput_results_column_mapping.keys())].rename(
-                columns=throughput_results_column_mapping)
+        throughput_results = throughput_results[
+            list(throughput_results_column_mapping.keys())
+        ].rename(columns=throughput_results_column_mapping)

-    processed_results_json = results_to_json(latency_results,
-                                             throughput_results,
-                                             serving_results)
+    processed_results_json = results_to_json(
+        latency_results, throughput_results, serving_results
+    )

    # get markdown tables
-    latency_md_table = tabulate(latency_results,
-                                headers='keys',
-                                tablefmt='pipe',
-                                showindex=False)
-    serving_md_table = tabulate(serving_results,
-                                headers='keys',
-                                tablefmt='pipe',
-                                showindex=False)
-    throughput_md_table = tabulate(throughput_results,
-                                   headers='keys',
-                                   tablefmt='pipe',
-                                   showindex=False)
+    latency_md_table = tabulate(
+        latency_results, headers="keys", tablefmt="pipe", showindex=False
+    )
+    serving_md_table = tabulate(
+        serving_results, headers="keys", tablefmt="pipe", showindex=False
+    )
+    throughput_md_table = tabulate(
+        throughput_results, headers="keys", tablefmt="pipe", showindex=False
+    )

    # document the result
    print(output_folder)
    with open(output_folder / "benchmark_results.md", "w") as f:
-
        results = read_markdown(markdown_template)
        results = results.format(
            latency_tests_markdown_table=latency_md_table,
            throughput_tests_markdown_table=throughput_md_table,
            serving_tests_markdown_table=serving_md_table,
-            benchmarking_results_in_json_string=processed_results_json)
+            benchmarking_results_in_json_string=processed_results_json,
+        )
        f.write(results)
--- a/benchmarks/scripts/patch_benchmark_dataset.py
+++ b/benchmarks/scripts/patch_benchmark_dataset.py
@ -7,9 +7,8 @@ import libcst.matchers as m
 # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls


-# TDOO(Potabk): Remove this patch when the issue is fixed in the upstream
+# TODO(Potabk): Remove this patch when the issue is fixed in the upstream
 class StreamingFalseTransformer(cst.CSTTransformer):
-
    def __init__(self):
        self.in_target_class = False
        self.in_target_func = False
@ -63,15 +62,18 @@ def patch_file(path):
    print(f"Patched: {abs_path}")


-if __name__ == '__main__':
+if __name__ == "__main__":
    parser = ArgumentParser(
-        description=
-        "Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
+        description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
+    )
+    parser.add_argument(
+        "--path", type=str, help="Path to the benchmark_dataset.py file"
    )
    parser.add_argument(
        "--path",
        type=str,
        default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
-        help="Path to the benchmark_dataset.py file")
+        help="Path to the benchmark_dataset.py file",
+    )
    args = parser.parse_args()
    patch_file(args.path)
--- a/benchmarks/scripts/run_accuracy.py
+++ b/benchmarks/scripts/run_accuracy.py
@ -44,82 +44,72 @@ BATCH_SIZE = {"ceval-valid": 1, "mmlu": 1, "gsm8k": "auto", "mmmu_val": 1}
 MODEL_TYPE = {
    "Qwen/Qwen3-8B-Base": "vllm",
    "Qwen/Qwen3-30B-A3B": "vllm",
-    "Qwen/Qwen2.5-VL-7B-Instruct": "vllm-vlm"
+    "Qwen/Qwen2.5-VL-7B-Instruct": "vllm-vlm",
 }

 # Command templates for running evaluations
 MODEL_RUN_INFO = {
-    "Qwen/Qwen3-30B-A3B":
-    ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n"
-     "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
-     "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
-     ),
-    "Qwen/Qwen3-8B-Base":
-    ("export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
-     "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
-     "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
-     ),
-    "Qwen/Qwen2.5-VL-7B-Instruct":
-    ("export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n"
-     "lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
-     "--apply_chat_template --fewshot_as_multiturn  --batch_size 1"),
+    "Qwen/Qwen3-30B-A3B": (
+        "export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True'\n"
+        "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
+        "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
+    ),
+    "Qwen/Qwen3-8B-Base": (
+        "export MODEL_ARGS='pretrained={model},max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6'\n"
+        "lm_eval --model vllm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
+        "--apply_chat_template --fewshot_as_multiturn --num_fewshot 5 --batch_size 1"
+    ),
+    "Qwen/Qwen2.5-VL-7B-Instruct": (
+        "export MODEL_ARGS='pretrained={model},max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2'\n"
+        "lm_eval --model vllm-vlm --model_args $MODEL_ARGS --tasks {datasets} \ \n"
+        "--apply_chat_template --fewshot_as_multiturn  --batch_size 1"
+    ),
 }

 # Evaluation metric filters per task
 FILTER = {
    "gsm8k": "exact_match,flexible-extract",
    "ceval-valid": "acc,none",
-    "mmmu_val": "acc,none"
+    "mmmu_val": "acc,none",
 }

 # Expected accuracy values for models
 EXPECTED_VALUE = {
-    "Qwen/Qwen3-30B-A3B": {
-        "ceval-valid": 0.83,
-        "gsm8k": 0.85
-    },
-    "Qwen/Qwen3-8B-Base": {
-        "ceval-valid": 0.82,
-        "gsm8k": 0.83
-    },
-    "Qwen/Qwen2.5-VL-7B-Instruct": {
-        "mmmu_val": 0.51
-    }
+    "Qwen/Qwen3-30B-A3B": {"ceval-valid": 0.83, "gsm8k": 0.85},
+    "Qwen/Qwen3-8B-Base": {"ceval-valid": 0.82, "gsm8k": 0.83},
+    "Qwen/Qwen2.5-VL-7B-Instruct": {"mmmu_val": 0.51},
 }
 PARALLEL_MODE = {
    "Qwen/Qwen3-8B-Base": "TP",
    "Qwen/Qwen2.5-VL-7B-Instruct": "TP",
-    "Qwen/Qwen3-30B-A3B": "EP"
+    "Qwen/Qwen3-30B-A3B": "EP",
 }

 # Execution backend configuration
 EXECUTION_MODE = {
    "Qwen/Qwen3-8B-Base": "ACLGraph",
    "Qwen/Qwen2.5-VL-7B-Instruct": "ACLGraph",
-    "Qwen/Qwen3-30B-A3B": "ACLGraph"
+    "Qwen/Qwen3-30B-A3B": "ACLGraph",
 }

 # Model arguments for evaluation
 MODEL_ARGS = {
-    "Qwen/Qwen3-8B-Base":
-    "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6",
-    "Qwen/Qwen2.5-VL-7B-Instruct":
-    "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2",
-    "Qwen/Qwen3-30B-A3B":
-    "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True"
+    "Qwen/Qwen3-8B-Base": "pretrained=Qwen/Qwen3-8B-Base,max_model_len=4096,dtype=auto,tensor_parallel_size=2,gpu_memory_utilization=0.6",
+    "Qwen/Qwen2.5-VL-7B-Instruct": "pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_model_len=8192,dtype=auto,tensor_parallel_size=2,max_images=2",
+    "Qwen/Qwen3-30B-A3B": "pretrained=Qwen/Qwen3-30B-A3B,max_model_len=4096,dtype=auto,tensor_parallel_size=4,gpu_memory_utilization=0.6,enable_expert_parallel=True",
 }

 # Whether to apply chat template formatting
 APPLY_CHAT_TEMPLATE = {
    "Qwen/Qwen3-8B-Base": True,
    "Qwen/Qwen2.5-VL-7B-Instruct": True,
-    "Qwen/Qwen3-30B-A3B": False
+    "Qwen/Qwen3-30B-A3B": False,
 }
 # Few-shot examples handling as multi-turn dialogues.
 FEWSHOT_AS_MULTITURN = {
    "Qwen/Qwen3-8B-Base": True,
    "Qwen/Qwen2.5-VL-7B-Instruct": True,
-    "Qwen/Qwen3-30B-A3B": False
+    "Qwen/Qwen3-30B-A3B": False,
 }

 # Relative tolerance for accuracy checks
@ -136,7 +126,7 @@ def run_accuracy_test(queue, model, dataset):
            "tasks": dataset,
            "apply_chat_template": APPLY_CHAT_TEMPLATE[model],
            "fewshot_as_multiturn": FEWSHOT_AS_MULTITURN[model],
-            "batch_size": BATCH_SIZE[dataset]
+            "batch_size": BATCH_SIZE[dataset],
        }

        if MODEL_TYPE[model] == "vllm":
@ -151,7 +141,7 @@ def run_accuracy_test(queue, model, dataset):
        queue.put(e)
        sys.exit(1)
    finally:
-        if 'results' in locals():
+        if "results" in locals():
            del results
        gc.collect()
        torch.npu.empty_cache()
@ -161,16 +151,15 @@ def run_accuracy_test(queue, model, dataset):
 def generate_md(model_name, tasks_list, args, datasets):
    """Generate Markdown report with evaluation results"""
    # Format the run command
-    run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name,
-                                                datasets=datasets)
+    run_cmd = MODEL_RUN_INFO[model_name].format(model=model_name, datasets=datasets)
    model = model_name.split("/")[1]

    # Version information section
    version_info = (
        f"**vLLM Version**: vLLM: {args.vllm_version} "
-        f"([{args.vllm_commit}]({VLLM_URL+args.vllm_commit})), "
+        f"([{args.vllm_commit}]({VLLM_URL + args.vllm_commit})), "
        f"vLLM Ascend: {args.vllm_ascend_version} "
-        f"([{args.vllm_ascend_commit}]({VLLM_ASCEND_URL+args.vllm_ascend_commit}))  "
+        f"([{args.vllm_ascend_commit}]({VLLM_ASCEND_URL + args.vllm_ascend_commit}))  "
    )

    # Report header with system info
@ -218,21 +207,39 @@ def generate_md(model_name, tasks_list, args, datasets):
            else:
                n_shot = "0"
            flag = ACCURACY_FLAG.get(task_name, "")
-            row = (f"| {task_name:<37} "
-                   f"| {flt:<6} "
-                   f"| {n_shot:6} "
-                   f"| {metric:<6} "
-                   f"| {flag}{value:>5.4f} "
-                   f"| ± {stderr:>5.4f} |")
+            row = (
+                f"| {task_name:<37} "
+                f"| {flt:<6} "
+                f"| {n_shot:6} "
+                f"| {metric:<6} "
+                f"| {flag}{value:>5.4f} "
+                f"| ± {stderr:>5.4f} |"
+            )
            if not task_name.startswith("-"):
                rows.append(row)
-                rows_sub.append("<details>" + "\n" + "<summary>" + task_name +
-                                " details" + "</summary>" + "\n" * 2 + header)
+                rows_sub.append(
+                    "<details>"
+                    + "\n"
+                    + "<summary>"
+                    + task_name
+                    + " details"
+                    + "</summary>"
+                    + "\n" * 2
+                    + header
+                )
            rows_sub.append(row)
        rows_sub.append("</details>")
    # Combine all Markdown sections
-    md = preamble + "\n" + header + "\n" + "\n".join(rows) + "\n" + "\n".join(
-        rows_sub) + "\n"
+    md = (
+        preamble
+        + "\n"
+        + header
+        + "\n"
+        + "\n".join(rows)
+        + "\n"
+        + "\n".join(rows_sub)
+        + "\n"
+    )
    print(md)
    return md

@ -262,8 +269,9 @@ def main(args):
    # Evaluate model on each dataset
    for dataset in datasets:
        accuracy_expected = EXPECTED_VALUE[args.model][dataset]
-        p = multiprocessing.Process(target=run_accuracy_test,
-                                    args=(result_queue, args.model, dataset))
+        p = multiprocessing.Process(
+            target=run_accuracy_test, args=(result_queue, args.model, dataset)
+        )
        p.start()
        p.join()
        if p.is_alive():
@ -274,8 +282,11 @@ def main(args):
        time.sleep(10)
        result = result_queue.get()
        print(result)
-        if accuracy_expected - RTOL < result[dataset][
-                FILTER[dataset]] < accuracy_expected + RTOL:
+        if (
+            accuracy_expected - RTOL
+            < result[dataset][FILTER[dataset]]
+            < accuracy_expected + RTOL
+        ):
            ACCURACY_FLAG[dataset] = "✅"
        else:
            ACCURACY_FLAG[dataset] = "❌"
@ -285,10 +296,11 @@ def main(args):


 if __name__ == "__main__":
-    multiprocessing.set_start_method('spawn', force=True)
+    multiprocessing.set_start_method("spawn", force=True)
    # Initialize argument parser
    parser = argparse.ArgumentParser(
-        description="Run model accuracy evaluation and generate report")
+        description="Run model accuracy evaluation and generate report"
+    )
    parser.add_argument("--output", type=str, required=True)
    parser.add_argument("--model", type=str, required=True)
    parser.add_argument("--vllm_ascend_version", type=str, required=False)
--- a/docs/source/developer_guide/contribution/index.md
+++ b/docs/source/developer_guide/contribution/index.md
@ -12,38 +12,55 @@ Theoretically, the vllm-ascend build is only supported on Linux because
 But you can still set up dev env on Linux/Windows/macOS for linting and basic
 test as following commands:

+#### Run lint locally
 ```bash
 # Choose a base dir (~/vllm-project/) and set up venv
 cd ~/vllm-project/
 python3 -m venv .venv
 source ./.venv/bin/activate

-# Clone vllm code and install
-git clone https://github.com/vllm-project/vllm.git
+# Clone vllm-ascend and install
+git clone https://github.com/vllm-project/vllm-ascend.git
+cd vllm-ascend
+
+# Install lint requirement and enable pre-commit hook
+pip install -r requirements-lint.txt
+
+# Run lint (You need install pre-commits deps via proxy network at first time)
+bash format.sh
+```
+
+#### Run CI locally
+
+After complete "Run lint" setup, you can run CI locally:
+
+```{code-block} bash
+   :substitutions:
+
+cd ~/vllm-project/
+
+# Run CI need vLLM installed
+git clone --branch |vllm_version| https://github.com/vllm-project/vllm.git
 cd vllm
 pip install -r requirements/build.txt
 VLLM_TARGET_DEVICE="empty" pip install .
 cd ..

-# Clone vllm-ascend and install
-git clone https://github.com/vllm-project/vllm-ascend.git
+# Install requirements
 cd vllm-ascend
-# install system requirement
-apt install -y gcc g++ cmake libnuma-dev
-# install project requirement
+# For Linux:
 pip install -r requirements-dev.txt
+# For non Linux:
+cat requirements-dev.txt | grep -Ev '^#|^--|^$|^-r' | while read PACKAGE; do pip install "$PACKAGE"; done
+cat requirements.txt | grep -Ev '^#|^--|^$|^-r' | while read PACKAGE; do pip install "$PACKAGE"; done

-# Then you can run lint and mypy test
-bash format.sh
+# Run ci:
+bash format.sh ci
+```

-# Build:
-# - only supported on Linux (torch_npu available)
-# pip install -e .
-# - build without deps for debugging in other OS
-# pip install -e . --no-deps
-# - build without custom ops
-# COMPILE_CUSTOM_KERNELS=0 pip install -e .
+#### Submit the commit

+```bash
 # Commit changed files using `-s`
 git commit -sm "your commit info"
 ```
--- a/docs/source/user_guide/additional_config.md
+++ b/docs/source/user_guide/additional_config.md
@ -1,6 +1,6 @@
 # Additional Configuration

-addintional configuration is a mechanism provided by vLLM to allow plugins to control inner behavior by their own. vLLM Ascend uses this mechanism to make the project more flexible.
+additional configuration is a mechanism provided by vLLM to allow plugins to control inner behavior by their own. vLLM Ascend uses this mechanism to make the project more flexible.

 ## How to use

--- a/format.sh
+++ b/format.sh
@ -19,325 +19,26 @@
 # Adapted from https://github.com/vllm-project/vllm/tree/main/tools
 #

-# YAPF formatter, adapted from ray and skypilot.
-#
-# Usage:
-#    # Do work and commit your work.
-
-#    # Format files that differ from origin/main.
-#    bash format.sh
-
-#    # Commit changed files with message 'Run yapf and ruff'
-#
-#
-# YAPF + Clang formatter (if installed). This script formats all changed files from the last mergebase.
-# You are encouraged to run this locally before pushing changes for review.
-
-# Cause the script to exit if a single command fails
-set -eo pipefail
-
-# this stops git rev-parse from failing if we run this from the .git directory
-builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
-ROOT="$(git rev-parse --show-toplevel)"
-builtin cd "$ROOT" || exit 1
-
 check_command() {
    if ! command -v "$1" &> /dev/null; then
-        echo "❓❓$1 is not installed, please run \`pip install -r requirements-lint.txt\`"
+        echo "❓❓$1 is not installed, please run:"
+        echo "# Install lint deps"
+        echo "pip install -r requirements-lint.txt"
+        echo "# (optional) Enable git commit pre check"
+        echo "pre-commit install"
+        echo ""
+        echo "See step by step contribution guide:"
+        echo "https://vllm-ascend.readthedocs.io/en/latest/developer_guide/contribution"
        exit 1
    fi
 }

-check_command yapf
-check_command ruff
-check_command mypy
-check_command codespell
-check_command isort
-check_command clang-format
+check_command pre-commit

-YAPF_VERSION=$(yapf --version | awk '{print $2}')
-RUFF_VERSION=$(ruff --version | awk '{print $2}')
-MYPY_VERSION=$(mypy --version | awk '{print $2}')
-CODESPELL_VERSION=$(codespell --version)
-ISORT_VERSION=$(isort --vn)
-CLANGFORMAT_VERSION=$(clang-format --version | awk '{print $3}')
-SPHINX_LINT_VERSION=$(sphinx-lint --version | awk '{print $2}')
-
-# params: tool name, tool version, required version
-tool_version_check() {
-    expected=$(grep "$1" requirements-lint.txt | cut -d'=' -f3)
-    if [[ "$2" != "$expected" ]]; then
-        echo "❓❓Wrong $1 version installed: $expected is required, not $2."
-        exit 1
-    fi
-}
-
-tool_version_check "yapf" "$YAPF_VERSION"
-tool_version_check "ruff" "$RUFF_VERSION"
-tool_version_check "mypy" "$MYPY_VERSION"
-tool_version_check "isort" "$ISORT_VERSION"
-tool_version_check "codespell" "$CODESPELL_VERSION"
-tool_version_check "clang-format" "$CLANGFORMAT_VERSION"
-tool_version_check "sphinx-lint" "$SPHINX_LINT_VERSION"
-
-YAPF_FLAGS=(
-    '--recursive'
-    '--parallel'
-)
-
-YAPF_EXCLUDES=(
-    '--exclude' 'build/**'
-)
-
-# Format specified files
-format() {
-    yapf --in-place "${YAPF_FLAGS[@]}" "$@"
-}
-
-# Format files that differ from main branch. Ignores dirs that are not slated
-# for autoformat yet.
-format_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause yapf to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs -P 5 \
-            yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
-    fi
-}
-
-# Format all files
-format_all() {
-    yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" .
-}
-
-echo 'vllm-ascend yapf:'
-## This flag formats individual files. --files *must* be the first command line
-## arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   format "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is formatted.
-elif [[ "$1" == '--all' ]]; then
-   format_all
+# TODO: cleanup SC exclude
+export SHELLCHECK_OPTS="--exclude=SC2046,SC2006,SC2086"
+if [[ "$1" != 'ci' ]]; then
+    pre-commit run --all-files
 else
-   # Format only the files that changed in last commit.
-   format_changed
+    pre-commit run --all-files --hook-stage manual
 fi
-echo 'vllm-ascend yapf: Done'
-
-# Run mypy
-echo 'vllm-ascend mypy:'
-tools/mypy.sh
-echo 'vllm-ascend mypy: Done'
-
-
-# If git diff returns a file that is in the skip list, the file may be checked anyway:
-# https://github.com/codespell-project/codespell/issues/1915
-# Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem
-CODESPELL_EXCLUDES=(
-    '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**'
-)
-
-CODESPELL_IGNORE_WORDS=(
-    '-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever'
-)
-
-# check spelling of specified files
-spell_check() {
-    codespell "$@" "${CODESPELL_IGNORE_WORDS[@]}"
-}
-
-spell_check_all() {
-  codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
-}
-
-# Spelling check of files that differ from main branch.
-spell_check_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause ruff to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-            codespell "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
-            codespell "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
-    fi
-}
-
-echo 'vllm-ascend codespell:'
-# Run Codespell
-## This flag runs spell check of individual files. --files *must* be the first command line
-## arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   spell_check "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is linted.
-elif [[ "$1" == '--all' ]]; then
-   spell_check_all
-else
-   # Check spelling only of the files that changed in last commit.
-   spell_check_changed
-fi
-echo 'vllm-ascend codespell: Done'
-
-
-# Lint specified files
-lint() {
-    ruff check "$@"
-}
-
-# Lint files that differ from main branch. Ignores dirs that are not slated
-# for autolint yet.
-lint_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause ruff to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             ruff check
-    fi
-
-}
-
-echo 'vllm-ascend ruff:'
-# Run Ruff
-### This flag lints individual files. --files *must* be the first command line
-### arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   lint "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is linted.
-elif [[ "$1" == '--all' ]]; then
-   lint vllm tests
-else
-   # Format only the files that changed in last commit.
-   lint_changed
-fi
-echo 'vllm-ascend ruff: Done'
-
-# check spelling of specified files
-isort_check() {
-    isort "$@"
-}
-
-isort_check_all(){
-  isort .
-}
-
-# Spelling  check of files that differ from main branch.
-isort_check_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause ruff to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
-        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             isort
-    fi
-}
-
-echo 'vllm-ascend isort:'
-# Run Isort
-# This flag runs spell check of individual files. --files *must* be the first command line
-# arg to use this option.
-if [[ "$1" == '--files' ]]; then
-   isort_check "${@:2}"
-   # If `--all` is passed, then any further arguments are ignored and the
-   # entire python directory is linted.
-elif [[ "$1" == '--all' ]]; then
-   isort_check_all
-else
-   # Check spelling only of the files that changed in last commit.
-   isort_check_changed
-fi
-echo 'vllm-ascend isort: Done'
-
-# Clang-format section
-# Exclude some files for formatting because they are vendored
-CLANG_FORMAT_EXCLUDES=(
-    'csrc/kernels/utils.h' 'csrc/kernels/pos_encoding_kernels.cpp' 'csrc/kernels/advance_step.cpp' 'csrc/kernels/get_masked_input_and_mask_kernel.cpp' 'csrc/torch_binding.cpp' 'csrc/ops.h'
-)
-
-# Format specified files with clang-format
-clang_format() {
-    clang-format -i "$@"
-}
-
-# Format files that differ from main branch with clang-format.
-clang_format_changed() {
-    # The `if` guard ensures that the list of filenames is not empty, which
-    # could cause clang-format to receive 0 positional arguments, making it hang
-    # waiting for STDIN.
-    #
-    # `diff-filter=ACM` and $MERGEBASE is to ensure we only format files that
-    # exist on both branches.
-    MERGEBASE="$(git merge-base origin/main HEAD)"
-
-    # Get the list of changed files, excluding the specified ones
-    changed_files=$(git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.h' '*.cpp' '*.cu' '*.cuh' | (grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") || echo -e))
-    if [ -n "$changed_files" ]; then
-        echo "$changed_files" | xargs -P 5 clang-format -i
-    fi
-}
-
-# Format all files with clang-format
-clang_format_all() {
-    find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
-        | grep -vFf <(printf "%s\n" "${CLANG_FORMAT_EXCLUDES[@]}") \
-        | xargs clang-format -i
-}
-
-# Run clang-format
-if [[ "$1" == '--files' ]]; then
-   clang_format "${@:2}"
-elif [[ "$1" == '--all' ]]; then
-   clang_format_all
-else
-   clang_format_changed
-fi
-echo 'vllm-ascend clang-format: Done'
-
-echo 'vllm-ascend actionlint:'
-tools/actionlint.sh -color
-echo 'vllm-ascend actionlint: Done'
-
-echo 'vllm-ascend shellcheck:'
-tools/shellcheck.sh
-echo 'vllm-ascend shellcheck: Done'
-
-echo 'excalidraw png check:'
-tools/png-lint.sh
-echo 'excalidraw png check: Done'
-
-if ! git diff --quiet &>/dev/null; then
-    echo 
-    echo "🔍🔍There are files changed by the format checker or by you that are not added and committed:"
-    git --no-pager diff --name-only
-    echo "🔍🔍Format checker passed, but please add, commit and push all the files above to include changes made by the format checker."
-
-    exit 1
-else
-    echo "✨🎉 Format check passed! Congratulations! 🎉✨"
-fi
-
-# echo 'vLLM sphinx-lint:'
-# tools/sphinx-lint.sh
-# echo 'vLLM sphinx-lint: Done'
--- a/requirements-lint.txt
+++ b/requirements-lint.txt
@ -1,15 +1,8 @@
 # formatting
-yapf==0.32.0
-toml==0.10.2
-tomli==2.0.2
-ruff==0.6.5
-codespell==2.3.0
-isort==5.13.2
-clang-format==18.1.5
-sphinx-lint==1.0.0
+pre-commit==4.0.1

 # type checking
-mypy==1.15.0
+mypy==1.11.1
 types-PyYAML
 types-requests
 types-setuptools
--- a/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py
+++ b/tests/e2e/singlecard/core/ascend_scheduler/test_ascend_scheduler.py
@ -324,8 +324,10 @@ def test_stop_via_update_from_output():

    model_output = ModelRunnerOutput(
        req_ids=[req.request_id for req in requests],
-        req_id_to_index={req.request_id: i
-                         for i, req in enumerate(requests)},
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(requests)
+        },
        sampled_token_ids=[[EOS_TOKEN_ID],
                           [10,
                            11]],  # First request hits EOS, second continues
@ -374,8 +376,10 @@ def test_stop_via_update_from_output():

    model_output = ModelRunnerOutput(
        req_ids=[req.request_id for req in requests],
-        req_id_to_index={req.request_id: i
-                         for i, req in enumerate(requests)},
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(requests)
+        },
        sampled_token_ids=[[10, 42, 12],
                           [13, 14]],  # First request hits stop token
        spec_token_ids=None,
@ -422,8 +426,10 @@ def test_stop_via_update_from_output():

    model_output = ModelRunnerOutput(
        req_ids=[req.request_id for req in requests],
-        req_id_to_index={req.request_id: i
-                         for i, req in enumerate(requests)},
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(requests)
+        },
        sampled_token_ids=[[10, 11, 12],
                           [13]],  # First request exceeds max_tokens
        spec_token_ids=None,
--- a/tests/e2e/singlecard/ops/test_vocabparallelembedding.py
+++ b/tests/e2e/singlecard/ops/test_vocabparallelembedding.py
@ -21,8 +21,8 @@ def get_masked_input_and_mask_ref(
        added_vocab_start_index: int,
        added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
    """Reference implementation for verification"""
-    org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ <
-                                                          org_vocab_end_index)
+    org_vocab_mask = (input_ >= org_vocab_start_index) & (
+        input_ < org_vocab_end_index)
    added_vocab_mask = (input_ >= added_vocab_start_index) & (
        input_ < added_vocab_end_index)
    added_offset = added_vocab_start_index - (
--- a/tests/e2e/singlecard/sample/test_rejection_sampler.py
+++ b/tests/e2e/singlecard/sample/test_rejection_sampler.py
@ -394,8 +394,8 @@ def test_rejection_sampling_approximates_target_distribution():
        distance_wrt_reference)

    expected_improvement_multiplier = 20
-    assert (relative_change_in_distance_wrt_target >
-            relative_change_in_distance_wrt_reference *
+    assert (relative_change_in_distance_wrt_target
+            > relative_change_in_distance_wrt_reference *
            expected_improvement_multiplier)


--- a/tests/e2e/singlecard/test_scheduler.py
+++ b/tests/e2e/singlecard/test_scheduler.py
@ -231,8 +231,10 @@ def test_stop_via_update_from_output():

    model_output = ModelRunnerOutput(
        req_ids=[req.request_id for req in requests],
-        req_id_to_index={req.request_id: i
-                         for i, req in enumerate(requests)},
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(requests)
+        },
        sampled_token_ids=[[EOS_TOKEN_ID],
                           [10,
                            11]],  # First request hits EOS, second continues
@ -279,8 +281,10 @@ def test_stop_via_update_from_output():

    model_output = ModelRunnerOutput(
        req_ids=[req.request_id for req in requests],
-        req_id_to_index={req.request_id: i
-                         for i, req in enumerate(requests)},
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(requests)
+        },
        sampled_token_ids=[[10, 42, 12],
                           [13, 14]],  # First request hits stop token
        spec_token_ids=None,
@ -325,8 +329,10 @@ def test_stop_via_update_from_output():

    model_output = ModelRunnerOutput(
        req_ids=[req.request_id for req in requests],
-        req_id_to_index={req.request_id: i
-                         for i, req in enumerate(requests)},
+        req_id_to_index={
+            req.request_id: i
+            for i, req in enumerate(requests)
+        },
        sampled_token_ids=[[10, 11, 12],
                           [13]],  # First request exceeds max_tokens
        spec_token_ids=None,
--- a/tools/enforce_regex_import.py
+++ b/tools/enforce_regex_import.py
@ -0,0 +1,104 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This file is a part of the vllm-ascend project.
+# Adapted from https://github.com/vllm-project/vllm/tree/main/tools
+#
+
+from __future__ import annotations
+
+import subprocess
+from pathlib import Path
+
+import regex as re
+
+FORBIDDEN_PATTERNS = re.compile(
+    r'^\s*(?:import\s+re(?:$|\s|,)|from\s+re\s+import)')
+ALLOWED_PATTERNS = [
+    re.compile(r'^\s*import\s+regex\s+as\s+re\s*$'),
+    re.compile(r'^\s*import\s+regex\s*$'),
+]
+
+
+def get_staged_python_files() -> list[str]:
+    try:
+        result = subprocess.run(
+            ['git', 'diff', '--cached', '--name-only', '--diff-filter=AM'],
+            capture_output=True,
+            text=True,
+            check=True)
+        files = result.stdout.strip().split(
+            '\n') if result.stdout.strip() else []
+        return [f for f in files if f.endswith('.py')]
+    except subprocess.CalledProcessError:
+        return []
+
+
+def is_forbidden_import(line: str) -> bool:
+    line = line.strip()
+    return bool(
+        FORBIDDEN_PATTERNS.match(line)
+        and not any(pattern.match(line) for pattern in ALLOWED_PATTERNS))
+
+
+def check_file(filepath: str) -> list[tuple[int, str]]:
+    violations = []
+    try:
+        with open(filepath, encoding='utf-8') as f:
+            for line_num, line in enumerate(f, 1):
+                if is_forbidden_import(line):
+                    violations.append((line_num, line.strip()))
+    except (OSError, UnicodeDecodeError):
+        pass
+    return violations
+
+
+def main() -> int:
+    files = get_staged_python_files()
+    if not files:
+        return 0
+
+    total_violations = 0
+
+    for filepath in files:
+        if not Path(filepath).exists():
+            continue
+
+        if filepath == "setup.py":
+            continue
+
+        violations = check_file(filepath)
+        if violations:
+            print(f"\n❌ {filepath}:")
+            for line_num, line in violations:
+                print(f"  Line {line_num}: {line}")
+                total_violations += 1
+
+    if total_violations > 0:
+        print(f"\n💡 Found {total_violations} violation(s).")
+        print("❌ Please replace 'import re' with 'import regex as re'")
+        print(
+            "   Also replace 'from re import ...' with 'from regex import ...'"
+        )  # noqa: E501
+        print("✅ Allowed imports:")
+        print("   - import regex as re")
+        print("   - import regex")  # noqa: E501
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tools/mypy.sh
+++ b/tools/mypy.sh
@ -20,12 +20,16 @@
 #

 CI=${1:-0}
-PYTHON_VERSION=${2:-3.9}
+PYTHON_VERSION=${2:-local}

 if [ "$CI" -eq 1 ]; then
    set -e
 fi

+if [ $PYTHON_VERSION == "local" ]; then
+    PYTHON_VERSION=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
+fi
+
 run_mypy() {
    echo "Running mypy on $1"
    mypy --check-untyped-defs --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
--- a/tools/shellcheck.sh
+++ b/tools/shellcheck.sh
@ -39,3 +39,7 @@ if ! [ -x "$(command -v shellcheck)" ]; then
    PATH="$PATH:$(pwd)/shellcheck-${scversion}"
    export PATH
 fi
+
+# should enable this
+# find . -path ./.git -prune -o -name "*.sh" -print0 \
+# | xargs -0 -I {} sh -c 'git check-ignore -q "{}" || shellcheck -s bash "{}"'
--- a/typos.toml
+++ b/typos.toml
@ -0,0 +1,177 @@
+[files]
+# these files may be written in non english words
+extend-exclude = []
+ignore-hidden = true
+ignore-files = true
+ignore-dot = true
+ignore-vcs = true
+ignore-global = true
+ignore-parent = true
+
+[default]
+binary = false
+check-filename = false
+check-file = true
+unicode = true
+ignore-hex = true
+identifier-leading-digits = false
+locale = "en"
+extend-ignore-identifiers-re = [".*Unc.*", ".*_thw",
+    ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*fo.*", ".*ba.*",
+    ".*ot.*", ".*[Tt]h[rR].*"]
+extend-ignore-words-re = ["CANN", "cann"]
+extend-ignore-re = []
+
+[default.extend-identifiers]
+nd_to_nz_2d = "nd_to_nz_2d"
+bbc5b7ede = "bbc5b7ede"
+womens_doubles = "womens_doubles"
+v_2nd = "v_2nd"
+splitted_input = "splitted_input"
+NOOPs = "NOOPs"
+typ = "typ"
+nin_shortcut = "nin_shortcut"
+UperNetDecoder = "UperNetDecoder"
+subtile = "subtile"
+SFOuput = "SFOuput"
+# huggingface transformers repo uses these words
+depthwise_seperable_out_channel = "depthwise_seperable_out_channel"
+DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d"
+depthwise_seperable_CNN = "depthwise_seperable_CNN"
+
+[default.extend-words]
+iy = "iy"
+tendencias = "tendencias"
+# intel cpu features
+tme = "tme"
+dout = "dout"
+Pn = "Pn"
+arange = "arange"
+
+[type.py]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.py.extend-identifiers]
+arange = "arange"
+NDArray = "NDArray"
+EOFError = "EOFError"
+
+[type.py.extend-words]
+
+[type.cpp]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.cpp.extend-identifiers]
+countr_one = "countr_one"
+
+[type.cpp.extend-words]
+
+[type.rust]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.rust.extend-identifiers]
+flate2 = "flate2"
+
+[type.rust.extend-words]
+ser = "ser"
+
+[type.lock]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.lock.extend-identifiers]
+
+[type.lock.extend-words]
+
+[type.jl]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.jl.extend-identifiers]
+
+[type.jl.extend-words]
+modul = "modul"
+egals = "egals"
+usig = "usig"
+egal = "egal"
+
+[type.go]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.go.extend-identifiers]
+flate = "flate"
+
+[type.go.extend-words]
+
+[type.css]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.css.extend-identifiers]
+nd = "nd"
+
+[type.css.extend-words]
+
+[type.man]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.man.extend-identifiers]
+Nd = "Nd"
+
+[type.man.extend-words]
+
+[type.cert]
+extend-glob = []
+check-file = false
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.cert.extend-identifiers]
+
+[type.cert.extend-words]
+
+[type.sh]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.sh.extend-identifiers]
+stap = "stap"
+ot = "ot"
+
+[type.sh.extend-words]
+
+[type.vimscript]
+extend-glob = []
+extend-ignore-identifiers-re = []
+extend-ignore-words-re = []
+extend-ignore-re = []
+
+[type.vimscript.extend-identifiers]
+windo = "windo"
+
+[type.vimscript.extend-words]
--- a/vllm_ascend/core/scheduler.py
+++ b/vllm_ascend/core/scheduler.py
@ -232,7 +232,7 @@ class AscendScheduler(Scheduler):
            token_budget -= num_new_tokens
            request.status = RequestStatus.RUNNING
            request.num_computed_tokens = num_computed_tokens
-            # Count the number of prifix cached tokens.
+            # Count the number of prefix cached tokens.
            if request.num_cached_tokens < 0:
                request.num_cached_tokens = num_computed_tokens

--- a/vllm_ascend/distributed/kv_transfer/simple_connector.py
+++ b/vllm_ascend/distributed/kv_transfer/simple_connector.py
@ -199,8 +199,11 @@ class SimpleConnector(KVConnectorBase):
        model_executable: torch.nn.Module,
        model_input: "ModelInputForGPUWithSamplingMetadata",
        kv_caches: List[torch.Tensor],
-    ) -> Tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata", ]:
+    ) -> Tuple[
+            Union[torch.Tensor, IntermediateTensors],
+            bool,
+            "ModelInputForGPUWithSamplingMetadata",
+    ]:
        bypass_model_exec = True

        model_config = self.model_config
--- a/vllm_ascend/models/deepseek_mtp.py
+++ b/vllm_ascend/models/deepseek_mtp.py
@ -108,7 +108,8 @@ class CustomDeepSeekMultiTokenPredictor(DeepSeekMultiTokenPredictor):
        self.num_mtp_layers = config.num_nextn_predict_layers
        # to map the exact layer index from weights
        self.layers = torch.nn.ModuleDict({
-            str(idx): CustomDeepSeekMultiTokenPredictorLayer(
+            str(idx):
+            CustomDeepSeekMultiTokenPredictorLayer(
                config,
                f"{prefix}.layers.{idx}",
                model_config=vllm_config.model_config,
--- a/vllm_ascend/ops/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe.py
@ -79,8 +79,9 @@ def process_topk_ids(topk_ids: torch.Tensor, expert_num: int, ep_size: int,
                        experts_per_ep_rank_val).to(original_dtype)
    indices_arange = torch.arange(topk_ids.shape[0], device=device)

-    is_new_segment = torch.cat((torch.tensor([True], device=device),
-                                assigned_ep_rank[1:] != assigned_ep_rank[:-1]))
+    is_new_segment = torch.cat(
+        (torch.tensor([True], device=device), assigned_ep_rank[1:]
+         != assigned_ep_rank[:-1]))
    temp_start_markers = torch.full_like(indices_arange,
                                         -1,
                                         dtype=indices_arange.dtype)
@ -469,13 +470,13 @@ def fused_experts_with_all2all_buffer(
        expert_idx_buffer_scatter.shape,
        dtype=expert_idx_buffer_scatter.dtype,
        device=expert_idx_buffer_scatter.device)
-    non_pad_len = torch.sum(
-        (expert_idx_buffer_scatter != global_num_experts).to(torch.int32))
-    hidden_states_pad_idx[
-        expert_idx_buffer_scatter != global_num_experts] = torch.arange(
-            non_pad_len,
-            dtype=expert_idx_buffer_scatter.dtype,
-            device=hidden_states.device)
+    non_pad_len = torch.sum((expert_idx_buffer_scatter
+                             != global_num_experts).to(torch.int32))
+    hidden_states_pad_idx[expert_idx_buffer_scatter !=
+                          global_num_experts] = torch.arange(
+                              non_pad_len,
+                              dtype=expert_idx_buffer_scatter.dtype,
+                              device=hidden_states.device)

    hidden_states_buffer_scatter = hidden_states[hidden_states_pad_idx]
    expert_idx_buffer_gather = torch.empty_like(
@ -528,8 +529,8 @@ def fused_experts_with_all2all_buffer(
    dist.all_to_all_single(hidden_states_gatter,
                           hidden_states_scatter,
                           group=ep_group.device_group)
-    hidden_states_gatter = hidden_states_gatter[
-        expert_idx_buffer_scatter != global_num_experts]
+    hidden_states_gatter = hidden_states_gatter[expert_idx_buffer_scatter !=
+                                                global_num_experts]
    if hidden_states_gatter.shape[0] != row_idx_len:
        hidden_states = torch.zeros((row_idx_len, hidden_states.shape[1]),
                                    dtype=hidden_states.dtype,
--- a/vllm_ascend/ops/vocab_parallel_embedding.py
+++ b/vllm_ascend/ops/vocab_parallel_embedding.py
@ -30,8 +30,8 @@ def get_masked_input_and_mask(
        added_vocab_end_index: int) -> Tuple[torch.Tensor, torch.Tensor]:
    # torch.compile will fuse all of the pointwise ops below
    # into a single kernel, making it very fast
-    org_vocab_mask = (input_ >= org_vocab_start_index) & (input_ <
-                                                          org_vocab_end_index)
+    org_vocab_mask = (input_ >= org_vocab_start_index) & (
+        input_ < org_vocab_end_index)
    added_vocab_mask = (input_ >= added_vocab_start_index) & (
        input_ < added_vocab_end_index)
    added_offset = added_vocab_start_index - (
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@ -880,8 +880,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
        assert total_num_scheduled_tokens > 0
        num_reqs = self.input_batch.num_reqs
        assert num_reqs > 0
-        if (self.use_aclgraph and
-                total_num_scheduled_tokens <= self.aclgraph_batch_sizes[-1]):
+        if (self.use_aclgraph and total_num_scheduled_tokens
+                <= self.aclgraph_batch_sizes[-1]):
            # Add padding to the batch size.
            num_input_tokens = self.vllm_config.pad_for_cudagraph(
                total_num_scheduled_tokens)