diff --git a/benchmarks/benchmark_ngram_proposer.py b/benchmarks/benchmark_ngram_proposer.py
index 291d87d608..626b150ee4 100644
--- a/benchmarks/benchmark_ngram_proposer.py
+++ b/benchmarks/benchmark_ngram_proposer.py
@@ -164,7 +164,7 @@ def invoke_main() -> None:
     )
     parser.add_argument(
         "--batched", action="store_true", help="consider time to prepare batch"
-    )  # noqa: E501
+    )
     parser.add_argument(
         "--num-iteration",
         type=int,
diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py
index f6b48ad524..58b9767d09 100644
--- a/benchmarks/benchmark_serving_structured_output.py
+++ b/benchmarks/benchmark_serving_structured_output.py
@@ -909,13 +909,13 @@ def create_argument_parser():
     parser.add_argument(
         "--tokenizer",
         type=str,
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--tokenizer-mode",
         type=str,
         default="auto",
-        help="Name or path of the tokenizer, if not using the default tokenizer.",  # noqa: E501
+        help="Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--num-prompts",
diff --git a/csrc/cutlass_extensions/vllm_cutlass_library_extension.py b/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
index 7a81dd40c8..5e742d0b02 100644
--- a/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
+++ b/csrc/cutlass_extensions/vllm_cutlass_library_extension.py
@@ -72,8 +72,8 @@ VLLMKernelScheduleTag: dict[
 ] = {
     **KernelScheduleTag,  # type: ignore
     **{
-        MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",
-        MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",
-        MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",
+        MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized",  # noqa: E501
+        MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong",  # noqa: E501
+        MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative",  # noqa: E501
     },
 }
diff --git a/examples/offline_inference/vision_language_pooling.py b/examples/offline_inference/vision_language_pooling.py
index 6f8679918c..33ffb59014 100644
--- a/examples/offline_inference/vision_language_pooling.py
+++ b/examples/offline_inference/vision_language_pooling.py
@@ -113,7 +113,7 @@ def run_e5_v(query: Query) -> ModelRequestData:
 def _get_vlm2vec_prompt_image(query: Query, image_token: str):
     if query["modality"] == "text":
         text = query["text"]
-        prompt = f"Find me an everyday image that matches the given caption: {text}"  # noqa: E501
+        prompt = f"Find me an everyday image that matches the given caption: {text}"
         image = None
     elif query["modality"] == "image":
         prompt = f"{image_token} Find a day-to-day image that looks similar to the provided image."  # noqa: E501
diff --git a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
index d39edb0b9d..1df11d9d84 100644
--- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
+++ b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py
@@ -203,9 +203,9 @@ class Proxy:
                 async with session.post(
                     url=url, json=data, headers=headers
                 ) as response:
-                    if 200 <= response.status < 300 or 400 <= response.status < 500:  # noqa: E501
+                    if 200 <= response.status < 300 or 400 <= response.status < 500:
                         if use_chunked:
-                            async for chunk_bytes in response.content.iter_chunked(  # noqa: E501
+                            async for chunk_bytes in response.content.iter_chunked(
                                 1024
                             ):
                                 yield chunk_bytes
diff --git a/pyproject.toml b/pyproject.toml
index b3cae3d00c..704f28fa65 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,52 +56,6 @@ include = ["vllm*"]
 "vllm/third_party/**" = ["ALL"]
 "vllm/version.py" = ["F401"]
 "vllm/_version.py" = ["ALL"]
-# TEMPORARY! These ignores will be fixed forward
-## Line length violations
-"csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"]
-"tests/compile/piecewise/test_simple.py" = ["E501"]
-"tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"]
-"tests/entrypoints/conftest.py" = ["E501"]
-"tests/entrypoints/openai/test_audio.py" = ["E501"]
-"tests/entrypoints/openai/test_chat.py" = ["E501"]
-"tests/entrypoints/openai/test_chat_template.py" = ["E501"]
-"tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"]
-"tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"]
-"tests/entrypoints/openai/test_video.py" = ["E501"]
-"tests/entrypoints/openai/test_vision.py" = ["E501"]
-"tests/entrypoints/test_chat_utils.py" = ["E501"]
-"tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"]
-"tests/models/language/generation/test_gemma.py" = ["E501"]
-"tests/models/language/generation/test_mistral.py" = ["E501"]
-"tests/models/multimodal/generation/test_ultravox.py" = ["E501"]
-"tests/models/multimodal/generation/test_voxtral.py" = ["E501"]
-"tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"]
-"tests/tool_use/test_tool_choice_required.py" = ["E501"]
-"tests/v1/attention/utils.py" = ["E501"]
-"tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"]
-"tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"]
-"tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"]
-"tests/v1/logits_processors/test_custom_offline.py" = ["E501"]
-"vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"]
-"vllm/compilation/collective_fusion.py" = ["E501"]
-"vllm/compilation/wrapper.py" = ["E501"]
-"vllm/config/vllm.py" = ["E501"]
-"vllm/distributed/device_communicators/all2all.py" = ["E501"]
-"vllm/entrypoints/openai/protocol.py" = ["E501"]
-"vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"]
-"vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"]
-"vllm/model_executor/models/bailing_moe.py" = ["E501"]
-"vllm/model_executor/models/hyperclovax_vision.py" = ["E501"]
-"vllm/model_executor/models/llama4_eagle.py" = ["E501"]
-"vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"]
-"vllm/model_executor/models/phi4mm.py" = ["E501"]
-"vllm/model_executor/models/qwen3_next.py" = ["E501"]
-"vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"]
-"vllm/v1/attention/backends/mla/common.py" = ["E501"]
-"vllm/v1/engine/utils.py" = ["E501"]
-"vllm/v1/utils.py" = ["E501"]
-"vllm/v1/worker/gpu_model_runner.py" = ["E501"]
-# End of temporary ignores
 
 [tool.ruff.lint]
 select = [
diff --git a/tests/compile/piecewise/test_simple.py b/tests/compile/piecewise/test_simple.py
index ea80c43c9d..920cd5a06c 100644
--- a/tests/compile/piecewise/test_simple.py
+++ b/tests/compile/piecewise/test_simple.py
@@ -132,10 +132,14 @@ def test_simple_piecewise_compile(use_inductor):
         splitting_ops=["silly.attention"],
         use_inductor_graph_partition=False,
         use_inductor=use_inductor,
-        expected_num_piecewise_graphs_seen=5,  # 2 * num_layers + 1
-        expected_num_piecewise_capturable_graphs_seen=3,  # 1 + num_layers
-        expected_num_backend_compilations=3,  # num_piecewise_capturable_graphs_seen
-        expected_num_cudagraph_captured=6,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        # 2 * num_layers + 1
+        expected_num_piecewise_graphs_seen=5,
+        # 1 + num_layers
+        expected_num_piecewise_capturable_graphs_seen=3,
+        # num_piecewise_capturable_graphs_seen
+        expected_num_backend_compilations=3,
+        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        expected_num_cudagraph_captured=6,
     )
 
 
@@ -147,14 +151,16 @@ def test_simple_inductor_graph_partition(splitting_ops):
         pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
 
     _run_simple_model(
-        # inductor graph partition automatically resets splitting_ops
-        # to be an empty list
+        # Inductor graph partition automatically resets splitting_ops to an empty list
         splitting_ops=splitting_ops,
         use_inductor_graph_partition=True,
         use_inductor=True,
-        expected_num_piecewise_graphs_seen=1,  # since not splitting at fx graph level
-        expected_num_piecewise_capturable_graphs_seen=1,  # since not splitting at fx graph level
-        expected_num_backend_compilations=1,  # since not splitting at fx graph level
-        expected_num_cudagraph_captured=6,  # inductor graph partition still captures 6
-        # graph, same as fx graph partition.
+        # Since not splitting at fx graph level
+        expected_num_piecewise_graphs_seen=1,
+        # Since not splitting at fx graph level
+        expected_num_piecewise_capturable_graphs_seen=1,
+        # Since not splitting at fx graph level
+        expected_num_backend_compilations=1,
+        # Inductor graph partition still captures 6 graph, same as fx graph partition
+        expected_num_cudagraph_captured=6,
     )
diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py
index 46b9e4bf8d..e053367fb3 100644
--- a/tests/compile/piecewise/test_toy_llama.py
+++ b/tests/compile/piecewise/test_toy_llama.py
@@ -367,11 +367,14 @@ def test_toy_llama(use_inductor: bool):
         kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0}
 
     with compilation_counter.expect(
-        num_graphs_seen=1,  # one graph for the model
+        # One graph for the model
+        num_graphs_seen=1,
         num_piecewise_graphs_seen=1,
         num_piecewise_capturable_graphs_seen=1,
-        num_backend_compilations=1,  # num_piecewise_capturable_graphs_seen
-        num_cudagraph_captured=2,  # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        # num_piecewise_capturable_graphs_seen
+        num_backend_compilations=1,
+        # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen
+        num_cudagraph_captured=2,
         **kwargs,
     ):
         outputs.append(
@@ -478,9 +481,10 @@ def benchmark():
                 # it is fine here, because we only use the lambda function once.
                 runtime = do_bench(
                     lambda: graphs[b][0](  # noqa
-                        input_ids[:b], positions[:b]
+                        input_ids[:b],  # noqa
+                        positions[:b],  # noqa
                     )
-                )  # noqa
+                )
                 piecewise_cudagraph_time[b] = runtime
             else:
                 runtime = do_bench(lambda: graphs[b][0].replay())  # noqa
diff --git a/tests/compile/test_functionalization.py b/tests/compile/test_functionalization.py
index 95e92a8780..ae17bc67b1 100644
--- a/tests/compile/test_functionalization.py
+++ b/tests/compile/test_functionalization.py
@@ -243,7 +243,7 @@ def test_fix_functionalization(model_class: torch.nn.Module, do_fusion: bool):
     # check if the functionalization pass is applied
     for op in model.ops_in_model(do_fusion):
         find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
-        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None  # noqa: E501
+        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None
 
     # make sure the ops were all de-functionalized
     found = dict()
diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py
index 25b3f16bd9..1fd5c26765 100644
--- a/tests/compile/test_fusion_attn.py
+++ b/tests/compile/test_fusion_attn.py
@@ -565,7 +565,7 @@ def test_attention_quant_pattern(
     elif quant_key.dtype == FP4_DTYPE:
         assert attn_nodes_post[0].kwargs.get("output_block_scale") is not None, (
             "Attention should have output_block_scale after FP4 fusion"
-        )  # noqa: E501
+        )
 
     # Check that results are close
     torch.testing.assert_close(result_unfused, result_fused_1, atol=1e-2, rtol=1e-2)
diff --git a/tests/compile/test_sequence_parallelism.py b/tests/compile/test_sequence_parallelism.py
index 1d14a89c3a..afb31cb95b 100644
--- a/tests/compile/test_sequence_parallelism.py
+++ b/tests/compile/test_sequence_parallelism.py
@@ -186,7 +186,7 @@ class TestQuantModel(torch.nn.Module):
         ):
             # If fusion happens, the fused op is the one
             # we check for (de)functionalization
-            return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]  # noqa: E501
+            return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default]
         else:
             # If no fusion, the original ops are checked
             return [
@@ -322,7 +322,7 @@ def sequence_parallelism_pass_on_test_model(
     # check if the functionalization pass is applied
     for op in model.ops_in_model():
         find_auto_fn(backend_no_func.graph_post_pass.nodes, op)
-        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None  # noqa: E501
+        assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None
 
     # make sure the ops were all de-functionalized
     found = dict()
diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
index 119e8e7621..7d55c40754 100644
--- a/tests/distributed/test_pipeline_parallel.py
+++ b/tests/distributed/test_pipeline_parallel.py
@@ -104,7 +104,7 @@ TEXT_GENERATION_MODELS = {
     # [Decoder-only]
     # Uses Llama
     # "BAAI/AquilaChat-7B": PPTestSettings.fast(),
-    "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"),  # noqa: E501
+    "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"),
     "baichuan-inc/Baichuan-7B": PPTestSettings.fast(),
     "baichuan-inc/Baichuan2-13B-Chat": PPTestSettings.fast(),
     "bigscience/bloomz-1b1": PPTestSettings.fast(),
@@ -138,7 +138,7 @@ TEXT_GENERATION_MODELS = {
     # Uses Llama
     # "mistralai/Mistral-7B-Instruct-v0.1": PPTestSettings.fast(),
     "state-spaces/mamba-130m-hf": PPTestSettings.fast(),
-    "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"),  # noqa: E501
+    "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"),
     "mosaicml/mpt-7b": PPTestSettings.fast(),
     "nvidia/Minitron-8B-Base": PPTestSettings.fast(),
     "allenai/OLMo-1B-hf": PPTestSettings.fast(),
@@ -151,13 +151,13 @@ TEXT_GENERATION_MODELS = {
     "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
     "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(
         multi_node_only=True, load_format="dummy"
-    ),  # noqa: E501
+    ),
     "Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
     "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
     "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
     "stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(),
     "bigcode/starcoder2-3b": PPTestSettings.fast(),
-    "upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"),  # noqa: E501
+    "upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"),
     # FIXME: Cannot load tokenizer in latest transformers version.
     # Need to use tokenizer from `meta-llama/Llama-2-7b-chat-hf`
     # "xverse/XVERSE-7B-Chat": PPTestSettings.fast(),
diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py
index e03d34ac8f..a52e1cb7df 100644
--- a/tests/entrypoints/conftest.py
+++ b/tests/entrypoints/conftest.py
@@ -83,7 +83,8 @@ def sample_complex_json_schema():
                 "type": "array",
                 "items": {
                     "type": "string",
-                    "pattern": "^[a-z]{1,10}$",  # Combining length and pattern restrictions
+                    # Combining length and pattern restrictions
+                    "pattern": "^[a-z]{1,10}$",
                 },
             },
         },
diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py
index f3e89f48f9..a96f0134c2 100644
--- a/tests/entrypoints/openai/test_audio.py
+++ b/tests/entrypoints/openai/test_audio.py
@@ -145,7 +145,7 @@ async def test_single_chat_session_audio_base64encoded(
                 {
                     "type": "audio_url",
                     "audio_url": {
-                        "url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}"
+                        "url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}"  # noqa: E501
                     },
                 },
                 {"type": "text", "text": "What's happening in this audio?"},
diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py
index e1d5971052..ed0b284bda 100644
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -835,17 +835,18 @@ async def test_extra_fields_allowed(client: openai.AsyncOpenAI):
 
 @pytest.mark.asyncio
 async def test_complex_message_content(client: openai.AsyncOpenAI):
+    content = [
+        {
+            "type": "text",
+            "text": "what is 1+1? please provide the result without any other text.",
+        }
+    ]
     resp = await client.chat.completions.create(
         model=MODEL_NAME,
         messages=[
             {
                 "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": "what is 1+1? please provide the result without any other text.",
-                    }
-                ],
+                "content": content,
             }
         ],
         temperature=0,
diff --git a/tests/entrypoints/openai/test_chat_template.py b/tests/entrypoints/openai/test_chat_template.py
index adaeb96fbf..d1202a5975 100644
--- a/tests/entrypoints/openai/test_chat_template.py
+++ b/tests/entrypoints/openai/test_chat_template.py
@@ -76,8 +76,8 @@ def test_load_chat_template():
     assert (
         template_content
         == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
-{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""
-    )  # noqa: E501
+{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""  # noqa: E501
+    )
 
 
 def test_no_load_chat_template_filelike():
diff --git a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
index 4f23eee462..e452b578ba 100644
--- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
+++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py
@@ -45,12 +45,13 @@ TOOLS = [
                 "properties": {
                     "city": {
                         "type": "string",
-                        "description": "The city to find the weather for, e.g. 'San Francisco'",
+                        "description": "The city to find the weather for, e.g. "
+                        "'San Francisco'",
                     },
                     "state": {
                         "type": "string",
-                        "description": "the two-letter abbreviation for the state that the city is"
-                        " in, e.g. 'CA' which would mean 'California'",
+                        "description": "the two-letter abbreviation for the state that "
+                        "the city is in, e.g. 'CA' which would mean 'California'",
                     },
                     "unit": {
                         "type": "string",
@@ -69,7 +70,8 @@ MESSAGES = [
     {"role": "assistant", "content": "I'm doing well! How can I help you?"},
     {
         "role": "user",
-        "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?",
+        "content": "Can you tell me what the temperate will be in Dallas, "
+        "in fahrenheit?",
     },
 ]
 
diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py
index 599d65187e..e64f68cad7 100644
--- a/tests/entrypoints/openai/test_completion_with_function_calling.py
+++ b/tests/entrypoints/openai/test_completion_with_function_calling.py
@@ -25,12 +25,14 @@ tools = [
                 "properties": {
                     "city": {
                         "type": "string",
-                        "description": "The city to find the weather for, e.g. 'Vienna'",
+                        "description": "The city to find the weather for, e.g. "
+                        "'Vienna'",
                         "default": "Vienna",
                     },
                     "country": {
                         "type": "string",
-                        "description": "The country that the city is in, e.g. 'Austria'",
+                        "description": "The country that the city is in, e.g. "
+                        "'Austria'",
                     },
                     "unit": {
                         "type": "string",
@@ -85,12 +87,14 @@ tools = [
                 "properties": {
                     "city": {
                         "type": "string",
-                        "description": "The city to get the forecast for, e.g. 'Vienna'",
+                        "description": "The city to get the forecast for, e.g. "
+                        "'Vienna'",
                         "default": "Vienna",
                     },
                     "country": {
                         "type": "string",
-                        "description": "The country that the city is in, e.g. 'Austria'",
+                        "description": "The country that the city is in, e.g. "
+                        "'Austria'",
                     },
                     "days": {
                         "type": "integer",
diff --git a/tests/entrypoints/openai/test_video.py b/tests/entrypoints/openai/test_video.py
index a0d118743e..4c7d1c14ca 100644
--- a/tests/entrypoints/openai/test_video.py
+++ b/tests/entrypoints/openai/test_video.py
@@ -179,7 +179,7 @@ async def test_single_chat_session_video_base64encoded(
                 {
                     "type": "video_url",
                     "video_url": {
-                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"
+                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"  # noqa: E501
                     },
                 },
                 {"type": "text", "text": "What's in this video?"},
@@ -238,7 +238,7 @@ async def test_single_chat_session_video_base64encoded_beamsearch(
                 {
                     "type": "video_url",
                     "video_url": {
-                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"
+                        "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}"  # noqa: E501
                     },
                 },
                 {"type": "text", "text": "What's in this video?"},
diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py
index cab23d1e2f..5a15a352f4 100644
--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -233,7 +233,7 @@ async def test_single_chat_session_image_base64encoded(
                 {
                     "type": "image_url",
                     "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
+                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"  # noqa: E501
                     },
                 },
                 {"type": "text", "text": content_text},
@@ -300,7 +300,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
                 {
                     "type": "image_url",
                     "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
+                        "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"  # noqa: E501
                     },
                 },
                 {"type": "text", "text": "What's in this image?"},
diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 975ca53a3a..6e92419c4f 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -947,7 +947,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
                     {"type": "image_url", "image_url": {"url": image_url}},
                     {
                         "type": "text",
-                        "text": "What's in <|image_1|> and how does it compare to the other one?",  # noqa: E501
+                        "text": "What's in <|image_1|> and how does it compare to "
+                        "the other one?",
                     },
                 ],
             }
@@ -960,8 +961,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt(
     assert conversation == [
         {
             "role": "user",
-            "content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to the "
-            "other one?",
+            "content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to "
+            "the other one?",
         }
     ]
     _assert_mm_data_is_image_input(mm_data, 2)
@@ -1364,7 +1365,7 @@ def test_parse_chat_messages_multiple_images_multiple_messages_interleave(
     _assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, None])
 
 
-def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave(  # noqa: E501
+def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave(
     phi3v_model_config_mm_interleaved,
     phi3v_tokenizer,
     image_url,
@@ -1451,14 +1452,14 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
     assert conversation == [
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
         },
         {"role": "assistant", "content": "Some stuff."},
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
         },
     ]
 
@@ -1468,7 +1469,7 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave(
     _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[None])
 
 
-def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave(  # noqa: E501
+def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave(
     qwen25omni_model_config_mm_interleaved,
     qwen25omni_tokenizer,
     image_url,
@@ -1521,14 +1522,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interl
     assert conversation == [
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
         },
         {"role": "assistant", "content": "Some stuff."},
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
         },
     ]
 
@@ -1593,14 +1594,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_empty_media_mes
     assert conversation == [
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
         },
         {"role": "assistant", "content": "Some stuff."},
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
         },
     ]
 
@@ -1661,14 +1662,14 @@ def test_parse_chat_messages_multiple_modals_with_partial_uuids_multiple_message
     assert conversation == [
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",  # noqa: E501
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>",
         },
         {"role": "assistant", "content": "Some stuff."},
         {
             "role": "user",
-            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n"
-            "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
+            "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>"
+            "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>",
         },
     ]
 
@@ -2193,7 +2194,8 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
     assert conversation == [
         {
             "role": "user",
-            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the "
+            "audio say?",
         }
     ]
     _assert_mm_data_inputs(mm_data, {"audio": 1})
@@ -2228,7 +2230,8 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
     assert conversation == [
         {
             "role": "user",
-            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
+            "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the "
+            "audio say?",
         }
     ]
     _assert_mm_data_inputs(await mm_future, {"audio": 1})
diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py
index 6037424bde..6b99ba7af5 100644
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -165,7 +165,7 @@ def test_env(
                             # FlashMLA only supports block_size == 64
                             pytest.skip("FlashMLA only supports block_size 64")
                         else:
-                            from vllm.v1.attention.backends.mla.flashmla import (  # noqa: E501
+                            from vllm.v1.attention.backends.mla.flashmla import (
                                 is_flashmla_supported,
                             )
 
diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py
index 903f13e444..091fa4fafe 100644
--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -331,7 +331,8 @@ class WeightTensors:
             in_dtype=config.dtype,
             quant_dtype=config.quant_dtype,
             block_shape=config.quant_block_shape,
-            per_out_ch_quant=config.is_per_act_token_quant,  # or config.is_per_out_ch_quant
+            # or config.is_per_out_ch_quant
+            per_out_ch_quant=config.is_per_act_token_quant,
         )
         return WeightTensors(
             w1=w1, w2=w2, w1_scale=w1_scale, w2_scale=w2_scale, w1_gs=w1_gs, w2_gs=w2_gs
diff --git a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
index 9040db017f..0ef306051c 100644
--- a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
+++ b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py
@@ -124,7 +124,7 @@ def make_feature_matrix(csv_file_path: str):
     results_df: Optional[pd.DataFrame] = None
     for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm(
         combinations
-    ):  # noqa: E501
+    ):
         config = Config(
             Ms=[m],
             K=k,
diff --git a/tests/kernels/moe/modular_kernel_tools/mk_objects.py b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
index d4b42ed8ea..566fb1e09d 100644
--- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py
+++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py
@@ -10,7 +10,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk
 from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
     BatchedDeepGemmExperts,
 )
-from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (  # noqa: E501
+from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import (
     BatchedTritonOrDeepGemmExperts,
 )
 from vllm.model_executor.layers.fused_moe.config import (
@@ -196,10 +196,10 @@ register_experts(
 
 # Disable on blackwell for now
 if has_deep_ep() and not current_platform.has_device_capability(100):
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
         DeepEPHTPrepareAndFinalize,
     )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
         DeepEPLLPrepareAndFinalize,
     )
 
@@ -233,7 +233,7 @@ if has_pplx():
     )
 
 if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100):
-    from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
         FlashInferExperts,
     )
     from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import (  # noqa: E501
diff --git a/tests/kernels/moe/parallel_utils.py b/tests/kernels/moe/parallel_utils.py
index 9d087ad13b..fb9e5df281 100644
--- a/tests/kernels/moe/parallel_utils.py
+++ b/tests/kernels/moe/parallel_utils.py
@@ -17,10 +17,10 @@ from typing_extensions import Concatenate, ParamSpec
 from vllm.utils import get_open_port, has_deep_ep
 
 if has_deep_ep():
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
         DeepEPHTPrepareAndFinalize,
     )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
         DeepEPLLPrepareAndFinalize,
     )
 
diff --git a/tests/kernels/moe/test_deepep_deepgemm_moe.py b/tests/kernels/moe/test_deepep_deepgemm_moe.py
index cfa752d6ea..e68c5bfa59 100644
--- a/tests/kernels/moe/test_deepep_deepgemm_moe.py
+++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py
@@ -30,10 +30,10 @@ from .parallel_utils import ProcessGroupInfo, parallel_launch
 from .utils import make_test_weights
 
 if has_deep_ep():
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
         DeepEPHTPrepareAndFinalize,
     )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
         DeepEPLLPrepareAndFinalize,
     )
 
diff --git a/tests/kernels/moe/test_deepep_moe.py b/tests/kernels/moe/test_deepep_moe.py
index f98e4407a0..a1dabea1f0 100644
--- a/tests/kernels/moe/test_deepep_moe.py
+++ b/tests/kernels/moe/test_deepep_moe.py
@@ -28,10 +28,10 @@ from ...utils import multi_gpu_test
 from .parallel_utils import ProcessGroupInfo, parallel_launch
 
 if has_deep_ep():
-    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import (
         DeepEPHTPrepareAndFinalize,
     )
-    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (  # noqa: E501
+    from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import (
         DeepEPLLPrepareAndFinalize,
     )
 
diff --git a/tests/kernels/moe/test_modular_kernel_combinations.py b/tests/kernels/moe/test_modular_kernel_combinations.py
index fdd46d7e1b..9c41145235 100644
--- a/tests/kernels/moe/test_modular_kernel_combinations.py
+++ b/tests/kernels/moe/test_modular_kernel_combinations.py
@@ -271,7 +271,7 @@ if __name__ == "__main__":
     parser = make_config_arg_parser(
         description=(
             "Run single prepare-finalize & fused-experts combination test"
-            "Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "  # noqa: E501
+            "Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations "
             "--pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts"
         )
     )
diff --git a/tests/kernels/moe/test_moe.py b/tests/kernels/moe/test_moe.py
index 8dcefa9f7a..9354e81987 100644
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -483,8 +483,8 @@ def test_mixtral_moe(
     }
 
     if use_rocm_aiter:
-        # The values of rtol and atol are set based on the tests in ROCM AITER package. # noqa: E501
-        # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174  # noqa: E501
+        # The values of rtol and atol are set based on the tests in ROCM AITER package.
+        # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174
         torch.testing.assert_close(
             hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100
         )
diff --git a/tests/kernels/moe/test_mxfp4_moe.py b/tests/kernels/moe/test_mxfp4_moe.py
index d53f851e01..83241c0008 100644
--- a/tests/kernels/moe/test_mxfp4_moe.py
+++ b/tests/kernels/moe/test_mxfp4_moe.py
@@ -10,11 +10,11 @@ import pytest
 import torch
 from packaging import version
 
-from vllm.model_executor.layers.quantization.quark.quark import (  # noqa: E501
+from vllm.model_executor.layers.quantization.quark.quark import (
     QuarkLinearMethod,
     QuarkW4A4MXFP4,
 )
-from vllm.model_executor.layers.quantization.quark.quark_moe import (  # noqa: E501
+from vllm.model_executor.layers.quantization.quark.quark_moe import (
     QuarkW4A4MXFp4MoEMethod,
 )
 from vllm.platforms import current_platform
diff --git a/tests/lora/test_chatglm3_tp.py b/tests/lora/test_chatglm3_tp.py
index 2ae651a2cf..d8058c5f87 100644
--- a/tests/lora/test_chatglm3_tp.py
+++ b/tests/lora/test_chatglm3_tp.py
@@ -12,7 +12,7 @@ PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example
 
 EXPECTED_LORA_OUTPUT = [
     "SELECT count(*) FROM singer",
-    "SELECT avg(age) ,  min(age) ,  max(age) FROM singer WHERE country  =  'France'",  # noqa: E501
+    "SELECT avg(age) ,  min(age) ,  max(age) FROM singer WHERE country  =  'France'",
     "SELECT name ,  country ,  age FROM singer ORDER BY age",
 ]
 
@@ -21,10 +21,16 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
     prompts = [
         PROMPT_TEMPLATE.format(query="How many singers do we have?"),
         PROMPT_TEMPLATE.format(
-            query="What is the average, minimum, and maximum age of all singers from France?"  # noqa: E501
+            query=(
+                "What is the average, minimum, and maximum "
+                "age of all singers from France?"
+            )
         ),
         PROMPT_TEMPLATE.format(
-            query="Show name, country, age for all singers ordered by age from the oldest to the youngest."  # noqa: E501
+            query=(
+                "Show name, country, age for all singers ordered "
+                "by age from the oldest to the youngest."
+            )
         ),
     ]
     sampling_params = vllm.SamplingParams(temperature=0, max_tokens=32)
diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py
index 1aea3986d5..0d9431bd7a 100644
--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@@ -15,10 +15,10 @@ MODEL_PATH = "meta-llama/Llama-2-7b-hf"
 
 EXPECTED_LORA_OUTPUT = [
     "  SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",  # noqa: E501
-    "  SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",  # noqa: E501
+    "  SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",
     "  SELECT one_mora FROM table_name_95 WHERE gloss = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] AND accented_mora = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] ",  # noqa: E501
     "  SELECT sex FROM people WHERE people_id IN (SELECT people_id FROM candidate GROUP BY sex ORDER BY COUNT(people_id) DESC LIMIT 1) ",  # noqa: E501
-    "  SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",  # noqa: E501
+    "  SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",
     "  SELECT womens_doubles FROM table_28138035_4 WHERE mens_singles = 'Werner Schlager' ",  # noqa: E501
 ]
 
diff --git a/tests/lora/test_llm_with_multi_loras.py b/tests/lora/test_llm_with_multi_loras.py
index 12855f108f..269a1ade77 100644
--- a/tests/lora/test_llm_with_multi_loras.py
+++ b/tests/lora/test_llm_with_multi_loras.py
@@ -26,7 +26,7 @@ LORA_RANK = 8
 LORA_TEST_PROMPTS = ["What is GitHub?", "Hi, tell me about you"]
 LORA_TEST_EXPECTED = [
     "GitHub is an open-source platform that provides a way to manage and develop software projects. It allows developers to store and manage code, collaborate on projects, and automate tasks.",  # noqa: E501
-    "I am Alice, an AI assistant developed by GitHub/Charent.",  # noqa: E501
+    "I am Alice, an AI assistant developed by GitHub/Charent.",
 ]
 
 
diff --git a/tests/models/language/generation/test_gemma.py b/tests/models/language/generation/test_gemma.py
index 53bff5ed86..246b893be3 100644
--- a/tests/models/language/generation/test_gemma.py
+++ b/tests/models/language/generation/test_gemma.py
@@ -16,7 +16,7 @@ def test_dummy_loader(vllm_runner, monkeypatch, model: str) -> None:
         ) as llm:
             if model == "google/gemma-3-4b-it":
                 normalizers = llm.llm.collective_rpc(
-                    lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item()
+                    lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item()  # noqa: E501
                 )
                 config = llm.llm.llm_engine.model_config.hf_config.text_config
             else:
diff --git a/tests/models/language/generation/test_mistral.py b/tests/models/language/generation/test_mistral.py
index ece2bed4f5..0ae83ec160 100644
--- a/tests/models/language/generation/test_mistral.py
+++ b/tests/models/language/generation/test_mistral.py
@@ -46,12 +46,13 @@ TOOLS = [
                 "properties": {
                     "city": {
                         "type": "string",
-                        "description": "The city to find the weather for, e.g. 'San Francisco'",
+                        "description": "The city to find the weather for, e.g. "
+                        "'San Francisco'",
                     },
                     "state": {
                         "type": "string",
-                        "description": "the two-letter abbreviation for the state that the city is"
-                        " in, e.g. 'CA' which would mean 'California'",
+                        "description": "the two-letter abbreviation for the state that "
+                        "the city is in, e.g. 'CA' which would mean 'California'",
                     },
                     "unit": {
                         "type": "string",
@@ -85,7 +86,8 @@ MSGS = [
     {"role": "system", "content": "You are an assistant."},
     {
         "role": "user",
-        "content": "Could you please rewrite the below article? \n\n My English needs improvving, maybe I make errors.",  # noqa
+        "content": "Could you please rewrite the below article? \n\n My English needs "
+        "improvving, maybe I make errors.",
     },
     {
         "role": "assistant",
@@ -96,14 +98,16 @@ MSGS = [
                 "type": "function",
                 "function": {
                     "name": "rewrite",
-                    "arguments": '{"text":"My English needs improvving, maybe I make errors."}',  # noqa
+                    "arguments": '{"text":"My English needs improvving, maybe '
+                    'I make errors."}',
                 },
             }
         ],
     },
     {
         "role": "tool",
-        "content": '{"action":"rewrite","outcome":"My English needs improving, maybe I make errors."}',  # noqa
+        "content": '{"action":"rewrite","outcome":"My English needs improving, maybe '
+        'I make errors."}',
         "tool_call_id": "bbc5b7ede",
         "name": "rewrite",
     },
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 9168778a16..656a6d3bd7 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -130,14 +130,14 @@ VLM_TEST_SETTINGS = {
         dtype="bfloat16",
         marks=[
             pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
-        ],  # noqa: E501
+        ],
     ),
     "qwen2_5_vl": VLMTestInfo(
         models=["Qwen/Qwen2.5-VL-3B-Instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
         prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -149,8 +149,8 @@ VLM_TEST_SETTINGS = {
         models=["Qwen/Qwen2.5-Omni-3B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
         prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",
+        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",
         max_model_len=4096,
         max_num_seqs=2,
         num_logprobs=6 if current_platform.is_cpu() else 5,
@@ -181,7 +181,7 @@ VLM_TEST_SETTINGS = {
         max_model_len=16384,
         hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
             "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
         image_size_factors=[(0.25, 0.5, 1.0)],
@@ -213,7 +213,7 @@ VLM_TEST_SETTINGS = {
         models=["Qwen/Qwen2.5-VL-3B-Instruct"],
         test_type=VLMTestType.IMAGE,
         prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -237,10 +237,10 @@ VLM_TEST_SETTINGS = {
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
                 "stop_sign": "<vlm_image>Please describe the image shortly.",
-                "cherry_blossom": "<vlm_image>Please infer the season with reason.",  # noqa: E501
+                "cherry_blossom": "<vlm_image>Please infer the season with reason.",
             }
         ),
-        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",  # noqa: E501
+        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",
         stop_str=["<|im_end|>"],
         image_size_factors=[(0.10, 0.15)],
         max_tokens=64,
@@ -252,11 +252,11 @@ VLM_TEST_SETTINGS = {
         prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
-                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+                "stop_sign": "<image>What's the content in the center of the image?",
+                "cherry_blossom": "<image>What is the season?",
             }
         ),
-        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        multi_image_prompt="<image><image>Describe the two images in detail.",
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -268,11 +268,11 @@ VLM_TEST_SETTINGS = {
         prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
-                "stop_sign": "<image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<image>What is the season?",  # noqa: E501
+                "stop_sign": "<image>What's the content in the center of the image?",
+                "cherry_blossom": "<image>What is the season?",
             }
         ),
-        multi_image_prompt="<image><image>Describe the two images in detail.",  # noqa: E501
+        multi_image_prompt="<image><image>Describe the two images in detail.",
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -311,14 +311,14 @@ VLM_TEST_SETTINGS = {
         max_num_seqs=2,
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                 "cherry_blossom": "<image>\nPlease infer the season with reason in details.",  # noqa: E501
             }
         ),
         multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",  # noqa: E501
         patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
         hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
-        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],  # noqa: E501
+        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],
         image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
     ),
     "fuyu": VLMTestInfo(
@@ -342,7 +342,7 @@ VLM_TEST_SETTINGS = {
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
                 "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
-                "cherry_blossom": "<start_of_image>What is the season?",  # noqa: E501
+                "cherry_blossom": "<start_of_image>What is the season?",
             }
         ),
         multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
@@ -356,7 +356,7 @@ VLM_TEST_SETTINGS = {
     "glm4v": VLMTestInfo(
         models=["zai-org/glm-4v-9b"],
         test_type=VLMTestType.IMAGE,
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
                 "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
@@ -377,9 +377,9 @@ VLM_TEST_SETTINGS = {
     "glm4_1v": VLMTestInfo(
         models=["zai-org/GLM-4.1V-9B-Thinking"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
+        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
+        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
         max_model_len=2048,
         max_num_seqs=2,
         get_stop_token_ids=lambda tok: [151329, 151336, 151338],
@@ -410,10 +410,10 @@ VLM_TEST_SETTINGS = {
             "h2oai/h2ovl-mississippi-2b",
         ],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",  # noqa: E501
+        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                 "cherry_blossom": "<image>\nWhat is the season?",
             }
         ),
@@ -444,7 +444,7 @@ VLM_TEST_SETTINGS = {
         prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                 "cherry_blossom": "<image>\nWhat is the season?",
             }
         ),
@@ -529,7 +529,7 @@ VLM_TEST_SETTINGS = {
         max_model_len=16384,
         hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
             "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
         auto_cls=AutoModelForImageTextToText,
         vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
         custom_test_opts=[
@@ -583,7 +583,7 @@ VLM_TEST_SETTINGS = {
         max_num_seqs=2,
         get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
             ["<|im_end|>", "<|endoftext|>"]
-        ),  # noqa: E501
+        ),
         hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
         patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
         # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
@@ -598,7 +598,7 @@ VLM_TEST_SETTINGS = {
         max_num_seqs=2,
         get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
             ["<|im_end|>", "<|endoftext|>"]
-        ),  # noqa: E501
+        ),
         hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
         patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
     ),
@@ -627,7 +627,7 @@ VLM_TEST_SETTINGS = {
         models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
         prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
         max_model_len=4096,
         max_num_seqs=2,
         dtype="half",
@@ -640,7 +640,7 @@ VLM_TEST_SETTINGS = {
         models=["AIDC-AI/Ovis2-1B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
         prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
         max_model_len=4096,
         max_num_seqs=2,
         dtype="half",
@@ -652,7 +652,7 @@ VLM_TEST_SETTINGS = {
         models=["AIDC-AI/Ovis2.5-2B"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
         prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<image>\n",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<image>\n",
         video_idx_to_prompt=lambda idx: "<video>\n",
         max_model_len=4096,
         max_num_seqs=2,
@@ -701,8 +701,8 @@ VLM_TEST_SETTINGS = {
         models=["Qwen/Qwen2-VL-2B-Instruct"],
         test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
         prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
         multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
         max_model_len=4096,
         max_num_seqs=2,
@@ -717,11 +717,11 @@ VLM_TEST_SETTINGS = {
         prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n",  # noqa: E501
         single_image_prompts=IMAGE_ASSETS.prompts(
             {
-                "stop_sign": "<image>\nWhat's the content in the center of the image?",  # noqa: E501
+                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                 "cherry_blossom": "<image>\nWhat is the season?",
             }
         ),
-        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",  # noqa: E501
+        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",
         max_model_len=4096,
         use_tokenizer_eos=True,
         patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
@@ -754,8 +754,8 @@ VLM_TEST_SETTINGS = {
             VLMTestType.VIDEO,
         ),
         prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
-        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
+        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
+        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
         max_model_len=4096,
         max_num_seqs=2,
         auto_cls=AutoModelForImageTextToText,
@@ -816,7 +816,7 @@ VLM_TEST_SETTINGS = {
         auto_cls=AutoModelForImageTextToText,
         hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
             "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-        ),  # noqa: E501
+        ),
         vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
         custom_test_opts=[
             CustomTestOptions(
diff --git a/tests/models/multimodal/generation/test_ultravox.py b/tests/models/multimodal/generation/test_ultravox.py
index da1e7c7486..6bfec6c2c8 100644
--- a/tests/models/multimodal/generation/test_ultravox.py
+++ b/tests/models/multimodal/generation/test_ultravox.py
@@ -170,7 +170,7 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
                 ],
                 {
                     "type": "text",
-                    "text": f"What's happening in these {len(audio_assets)} audio clips?",
+                    "text": f"What's happening in these {len(audio_assets)} audio clips?",  # noqa: E501
                 },
             ],
         }
diff --git a/tests/models/multimodal/generation/test_voxtral.py b/tests/models/multimodal/generation/test_voxtral.py
index aa9628435e..d27b3ab5ff 100644
--- a/tests/models/multimodal/generation/test_voxtral.py
+++ b/tests/models/multimodal/generation/test_voxtral.py
@@ -101,16 +101,11 @@ async def test_online_serving(client, audio_assets: AudioTestAssets):
         return audio_dict
 
     audio_chunks = [asset_to_chunk(asset) for asset in audio_assets]
+    text = f"What's happening in these {len(audio_assets)} audio clips?"
     messages = [
         {
             "role": "user",
-            "content": [
-                *audio_chunks,
-                {
-                    "type": "text",
-                    "text": f"What's happening in these {len(audio_assets)} audio clips?",
-                },
-            ],
+            "content": [*audio_chunks, {"type": "text", "text": text}],
         }
     ]
 
diff --git a/tests/models/multimodal/generation/vlm_utils/custom_inputs.py b/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
index 32a945f253..8f2f8bba39 100644
--- a/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
+++ b/tests/models/multimodal/generation/vlm_utils/custom_inputs.py
@@ -102,8 +102,8 @@ def multi_video_multi_aspect_ratio_inputs(
 def different_patch_input_cases_internvl():
     images = [asset.pil_image.resize((896, 896)) for asset in IMAGE_ASSETS]
     formatter = (
-        lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"
-    )  # noqa: E501
+        lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n"  # noqa: E501
+    )
     single_img_prompts = [
         "<image>\nWhat's the content in the center of the image?",
         "<image>\nWhat is the season?",
diff --git a/tests/tool_use/test_tool_choice_required.py b/tests/tool_use/test_tool_choice_required.py
index 0d0b8ccf0b..d52c141f62 100644
--- a/tests/tool_use/test_tool_choice_required.py
+++ b/tests/tool_use/test_tool_choice_required.py
@@ -47,7 +47,8 @@ EXAMPLE_TOOLS = [
                 "properties": {
                     "city": {
                         "type": "string",
-                        "description": "The city to get the forecast for, e.g. 'New York'",
+                        "description": "The city to get the forecast for, e.g. "
+                        "'New York'",
                     },
                     "days": {
                         "type": "integer",
diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py
index d8b2f7ab8a..f30a6628b1 100644
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@@ -134,15 +134,15 @@ def get_attention_backend(backend_name: _Backend):
             else "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend"
         ),
         _Backend.FLASHINFER: "vllm.v1.attention.backends.flashinfer.FlashInferBackend",
-        _Backend.FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",
-        _Backend.TRITON_ATTN: "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",
+        _Backend.FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend",  # noqa: E501
+        _Backend.TRITON_ATTN: "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend",  # noqa: E501
         _Backend.TREE_ATTN: "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend",
-        _Backend.XFORMERS: "vllm.v1.attention.backends.xformers.XFormersAttentionBackend",
-        _Backend.CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend",
+        _Backend.XFORMERS: "vllm.v1.attention.backends.xformers.XFormersAttentionBackend",  # noqa: E501
+        _Backend.CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend",  # noqa: E501
         _Backend.FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend",
-        _Backend.FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend",
-        _Backend.FLASHINFER_MLA: "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend",
-        _Backend.TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend",
+        _Backend.FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend",  # noqa: E501
+        _Backend.FLASHINFER_MLA: "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend",  # noqa: E501
+        _Backend.TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend",  # noqa: E501
     }
 
     if backend_name not in backend_map:
diff --git a/tests/v1/entrypoints/openai/responses/test_image.py b/tests/v1/entrypoints/openai/responses/test_image.py
index 46e6256903..980d83b787 100644
--- a/tests/v1/entrypoints/openai/responses/test_image.py
+++ b/tests/v1/entrypoints/openai/responses/test_image.py
@@ -104,7 +104,7 @@ async def test_single_chat_session_image_base64encoded(
             "content": [
                 {
                     "type": "input_image",
-                    "image_url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}",
+                    "image_url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}",  # noqa: E501
                     "detail": "auto",
                 },
                 {"type": "input_text", "text": content_text},
diff --git a/tests/v1/kv_connector/nixl_integration/test_accuracy.py b/tests/v1/kv_connector/nixl_integration/test_accuracy.py
index 81bd5e30d4..b301968e5b 100644
--- a/tests/v1/kv_connector/nixl_integration/test_accuracy.py
+++ b/tests/v1/kv_connector/nixl_integration/test_accuracy.py
@@ -15,8 +15,9 @@ RTOL = 0.03
 EXPECTED_VALUES = {"Qwen/Qwen3-0.6B": 0.41, "deepseek-ai/deepseek-vl2-small": 0.59}
 
 SIMPLE_PROMPT = (
-    "The best part about working on vLLM is that I got to meet so many people across various different organizations like UCB, Google, and Meta which means",
-)  # noqa: E501
+    "The best part about working on vLLM is that I got to meet so many people across "
+    "various different organizations like UCB, Google, and Meta which means",
+)
 
 # Get model name from environment variable
 MODEL_NAME = os.environ.get("TEST_MODEL", "Qwen/Qwen3-0.6B")
diff --git a/tests/v1/kv_connector/unit/test_offloading_connector.py b/tests/v1/kv_connector/unit/test_offloading_connector.py
index 9784053750..46a5c09709 100644
--- a/tests/v1/kv_connector/unit/test_offloading_connector.py
+++ b/tests/v1/kv_connector/unit/test_offloading_connector.py
@@ -127,7 +127,7 @@ class RequestRunner:
             kv_role="kv_both",
             kv_connector_extra_config={
                 "spec_name": "MockOffloadingSpec",
-                "spec_module_path": "tests.v1.kv_connector.unit.test_offloading_connector",
+                "spec_module_path": "tests.v1.kv_connector.unit.test_offloading_connector",  # noqa: E501
                 "block_size": offloaded_block_size,
             },
         )
diff --git a/tests/v1/logits_processors/test_custom_offline.py b/tests/v1/logits_processors/test_custom_offline.py
index b83129858b..f57a21dce5 100644
--- a/tests/v1/logits_processors/test_custom_offline.py
+++ b/tests/v1/logits_processors/test_custom_offline.py
@@ -260,15 +260,8 @@ def test_pooling_rejects_custom_logitsprocs(
             gpu_memory_utilization=0.1,
         )
         # Require that no logitsprocs have been loaded
-        assert (
-            sum(
-                [
-                    1
-                    for _ in llm.llm_engine.model_executor.driver_worker.worker.model_runner.input_batch.logitsprocs.all
-                ]
-            )
-            == 0
-        )
+        worker = llm.llm_engine.model_executor.driver_worker.worker
+        assert sum([1 for _ in worker.model_runner.input_batch.logitsprocs.all]) == 0
         return
 
     kwargs: dict[str, list[Union[str, type[LogitsProcessor]]]] = {}
diff --git a/vllm/attention/ops/pallas_kv_cache_update.py b/vllm/attention/ops/pallas_kv_cache_update.py
index 357e3d609a..d0d836cc6a 100644
--- a/vllm/attention/ops/pallas_kv_cache_update.py
+++ b/vllm/attention/ops/pallas_kv_cache_update.py
@@ -76,10 +76,14 @@ def _kv_cache_update_kernel(
     static_argnames=["page_size", "num_slices_per_block"],
 )
 def kv_cache_update(
-    new_kv: jax.Array,  # [total_num_token, num_combined_kv_heads, head_dim]
-    slices: jax.Array,  # [3, slices], list of (kv_cache_start, new_kv_start, slice_len)
-    kv_cache: jax.Array,  # [total_num_pages * page_size, num_combined_kv_heads, head_dim]
-    num_kv_update_slices: jax.Array,  # [1]
+    # [total_num_token, num_combined_kv_heads, head_dim]
+    new_kv: jax.Array,
+    # [3, slices], list of (kv_cache_start, new_kv_start, slice_len)
+    slices: jax.Array,
+    # [total_num_pages * page_size, num_combined_kv_heads, head_dim]
+    kv_cache: jax.Array,
+    # [1]
+    num_kv_update_slices: jax.Array,
     *,
     page_size: int = 32,
     num_slices_per_block: int = 8,
diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py
index 5860833c14..01fd9f9a1c 100644
--- a/vllm/compilation/collective_fusion.py
+++ b/vllm/compilation/collective_fusion.py
@@ -834,7 +834,10 @@ class AllReduceFusedRMSNormStaticQuantFP8Pattern(BasePattern):
                 scale_out=None,
                 rms_gamma=weight,
                 rms_eps=self.epsilon,
-                pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP8Quant,  # we don't use norm_out afterwards
+                # We don't use norm_out afterwards
+                pattern_code=(
+                    flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP8Quant
+                ),
                 scale_factor=scale,
                 **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
             )
@@ -928,11 +931,14 @@ class AllReduceFusedAddRMSNormStaticQuantFP8Pattern(BasePattern):
                 scale_out=None,
                 rms_gamma=weight,
                 rms_eps=self.epsilon,
-                pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP8Quant,  # we don't use norm_out afterwards
+                # We don't use norm_out afterwards
+                pattern_code=(
+                    flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP8Quant
+                ),
                 scale_factor=scale,
                 **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
             )
-            # # quant_out, rms_norm_residual
+            # quant_out, rms_norm_residual
             return allreduce[4], allreduce[2]
 
         pm.register_replacement(
@@ -1028,7 +1034,10 @@ class AllReduceFusedRMSNormStaticQuantNVFP4Pattern(BasePattern):
                 scale_out=output_scale,
                 rms_gamma=weight,
                 rms_eps=self.epsilon,
-                pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP4Quant,  # we don't use norm_out afterwards
+                # We don't use norm_out afterwards
+                pattern_code=(
+                    flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP4Quant
+                ),
                 scale_factor=input_global_scale,
                 **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
             )
@@ -1130,7 +1139,10 @@ class AllReduceFusedAddRMSNormStaticQuantNVFP4Pattern(BasePattern):
                 scale_out=output_scale,
                 rms_gamma=weight,
                 rms_eps=self.epsilon,
-                pattern_code=flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP4Quant,  # we don't use norm_out afterwards
+                # We don't use norm_out afterwards
+                pattern_code=(
+                    flashinfer_comm.AllReduceFusionPattern.kARResidualRMSNormFP4Quant
+                ),
                 scale_factor=input_global_scale,
                 **self.allreduce_params.get_trtllm_fused_allreduce_kwargs(),
             )
diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
index ca7c00725c..71a4e1745d 100644
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -119,9 +119,12 @@ class TorchCompileWrapperWithCustomDispatcher:
 
             src = depyf.decompile(new_code)
             msg = (
-                "Assigning / modifying buffers of nn.Module during forward pass is not allowed when using cudagraph inside the compiler because it will cause silent errors. Please use eager mode or fix the code. The following code contains clues about which buffer is being modified (please search for the usage of the function `update`):\n"
-                + src
-            )  # noqa
+                "Assigning / modifying buffers of nn.Module during forward pass is not "
+                "allowed when using cudagraph inside the compiler because it will "
+                "cause silent errors. Please use eager mode or fix the code. The "
+                "following code contains clues about which buffer is being modified "
+                f"(please search for the usage of the function `update`):\n{src}"
+            )
             raise RuntimeError(msg)
 
     @contextmanager
@@ -132,8 +135,9 @@ class TorchCompileWrapperWithCustomDispatcher:
         variables as the original code. Therefore we can directly switch
         the code object in the function and call it.
 
-        See https://dev-discuss.pytorch.org/t/what-is-the-relationship-requirement-among-original-bytecode-transformed-bytecode-and-bytecode-returned-by-hooks-in-dynamo/1693/7 for more details.
-        """  # noqa
+        See https://dev-discuss.pytorch.org/t/what-is-the-relationship-requirement-among-original-bytecode-transformed-bytecode-and-bytecode-returned-by-hooks-in-dynamo/1693/7
+        for more details.
+        """
         self.__class__.forward.__code__ = self.compiled_codes[index]
         yield
         self.__class__.forward.__code__ = self.original_code_object
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index 7b8dc0775e..b5856958ce 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -472,7 +472,7 @@ class VllmConfig:
                 self.compilation_config.cudagraph_mode.has_full_cudagraphs()
                 and self.model_config is not None
                 and not self.model_config.disable_cascade_attn
-                and not self.compilation_config.cudagraph_mode.has_piecewise_cudagraphs()
+                and not self.compilation_config.cudagraph_mode.has_piecewise_cudagraphs()  # noqa: E501
             ):
                 logger.warning_once(
                     "No piecewise cudagraph for executing cascade attention."
diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py
index 603c3d1a8e..a22f43cd88 100644
--- a/vllm/distributed/device_communicators/all2all.py
+++ b/vllm/distributed/device_communicators/all2all.py
@@ -147,8 +147,9 @@ class PPLXAll2AllManager(All2AllManagerBase):
 
     def __init__(self, cpu_group):
         assert has_pplx(), (
-            "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install pplx_kernels."
-        )  # noqa
+            "pplx_kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md"
+            " to install pplx_kernels."
+        )
         super().__init__(cpu_group)
 
         if self.internode:
@@ -220,7 +221,8 @@ class DeepEPAll2AllManagerBase(All2AllManagerBase):
 
     def __init__(self, cpu_group):
         assert has_deep_ep(), (
-            "DeepEP kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md to install DeepEP kernels."
+            "DeepEP kernels not found. Please follow https://github.com/vllm-project/vllm/blob/main/tools/ep_kernels/README.md"
+            " to install DeepEP kernels."
         )  # noqa
         super().__init__(cpu_group)
         self.handle_cache = Cache()
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 221b88d796..a92e8372b3 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -471,7 +471,8 @@ class ChatCompletionRequest(OpenAIBaseModel):
     top_logprobs: Optional[int] = 0
     max_tokens: Optional[int] = Field(
         default=None,
-        deprecated="max_tokens is deprecated in favor of the max_completion_tokens field",
+        deprecated="max_tokens is deprecated in favor of "
+        "the max_completion_tokens field",
     )
     max_completion_tokens: Optional[int] = None
     n: Optional[int] = 1
diff --git a/vllm/lora/layers/vocal_parallel_embedding.py b/vllm/lora/layers/vocal_parallel_embedding.py
index 026089994d..42eae1d4e3 100644
--- a/vllm/lora/layers/vocal_parallel_embedding.py
+++ b/vllm/lora/layers/vocal_parallel_embedding.py
@@ -31,7 +31,7 @@ class VocabParallelEmbeddingWithLoRA(BaseLayerWithLoRA):
         if self.base_layer.num_added_embeddings_per_partition > 0:
             # We can start adding lora weights
             self.embeddings_weights = self.base_layer.weight.data[
-                self.base_layer.num_org_embeddings_per_partition : self.base_layer.num_org_embeddings_per_partition
+                self.base_layer.num_org_embeddings_per_partition : self.base_layer.num_org_embeddings_per_partition  # noqa: E501
                 + self.base_layer.num_added_embeddings_per_partition
             ]
             self.embeddings_slice = (
diff --git a/vllm/model_executor/layers/quantization/ptpc_fp8.py b/vllm/model_executor/layers/quantization/ptpc_fp8.py
index 1ef7ca5d1d..c0156321f6 100644
--- a/vllm/model_executor/layers/quantization/ptpc_fp8.py
+++ b/vllm/model_executor/layers/quantization/ptpc_fp8.py
@@ -107,8 +107,8 @@ class PTPCFp8LinearMethod(Fp8LinearMethod):
         layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)
 
         assert layer.weight.data.dtype == torch.bfloat16, (
-            f"Currently torch._scaled_mm (hipBLASLt) rowwise gemm only support output dtype of bfloat16. {str(layer.weight.data.dtype)} is specified."
-        )  # noqa: E501
+            f"Currently torch._scaled_mm (hipBLASLt) rowwise gemm only support output dtype of bfloat16. {str(layer.weight.data.dtype)} is specified."  # noqa: E501
+        )
         # Quantize the weights.
         qweight, weight_scale = ops.scaled_fp8_quant(
             layer.weight, scale=None, use_per_token_if_dynamic=True
diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
index b296a390f1..8c1ff0300b 100644
--- a/vllm/model_executor/model_loader/bitsandbytes_loader.py
+++ b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -391,7 +391,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
                     total_shard_sizes = next(
                         (
                             sizes
-                            for module, sizes in self.maybe_fused_weights_modules.items()
+                            for module, sizes in self.maybe_fused_weights_modules.items()  # noqa: E501
                             if check_match(mapped_weight_name, module)
                         )
                     )
diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py
index 0b2283f82d..3911ba5990 100644
--- a/vllm/model_executor/models/bailing_moe.py
+++ b/vllm/model_executor/models/bailing_moe.py
@@ -270,8 +270,8 @@ class BailingMoE(nn.Module):
             ) or (
                 self.score_function == "sigmoid" and self.correction_bias is not None
             ), (
-                "score_function and correction_bias should be in 2 combination (softmax, None) or (sigmoid, not None)"
-            )  # noqa: E501
+                "score_function and correction_bias should be in 2 combination (softmax, None) or (sigmoid, not None)"  # noqa: E501
+            )
         else:
             # default value for scoring_func
             self.score_function = "softmax"
diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py
index ab80eb2395..611c14733c 100644
--- a/vllm/model_executor/models/hyperclovax_vision.py
+++ b/vllm/model_executor/models/hyperclovax_vision.py
@@ -825,10 +825,10 @@ class HCXVisionForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
         # Run MM-Projector
         # len(num_grids) == len(num_queries_vis_abstractors) + 1
         grid_idx = 0
-        num_grids = [
-            grid_idx
-        ]  # e.g. [0, 9, 18, 19, 27, 28, 36, 37, 45, 46, 54, 55, 56]
-        num_queries_vis_abstractors = []  # e.g. [81, 81, 81, 9, 81, 9, 81, 9, 81, 9, 81, 9]
+        # e.g. [0, 9, 18, 19, 27, 28, 36, 37, 45, 46, 54, 55, 56]
+        num_grids = [grid_idx]
+        # e.g. [81, 81, 81, 9, 81, 9, 81, 9, 81, 9, 81, 9]
+        num_queries_vis_abstractors = []
         len_total_frames = video_forward_outs.shape[0]
 
         if self.config.first_last_frames_slow:
diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py
index b3c8472e6a..039022ef45 100644
--- a/vllm/model_executor/models/llama4_eagle.py
+++ b/vllm/model_executor/models/llama4_eagle.py
@@ -154,9 +154,10 @@ class LlamaModel(nn.Module):
                     str(layer_index), str(layer_index + start_layer_id)
                 )
 
-            quant_config.torchao_config.module_fqn_to_config = {
+            torchao_config = quant_config.torchao_config
+            torchao_config.module_fqn_to_config = {
                 pad_layer_name(layer): quantization
-                for layer, quantization in quant_config.torchao_config.module_fqn_to_config.items()
+                for layer, quantization in torchao_config.module_fqn_to_config.items()
             }
 
 
diff --git a/vllm/model_executor/models/longcat_flash_mtp.py b/vllm/model_executor/models/longcat_flash_mtp.py
index 0f3e9bc623..55468f354c 100644
--- a/vllm/model_executor/models/longcat_flash_mtp.py
+++ b/vllm/model_executor/models/longcat_flash_mtp.py
@@ -186,26 +186,26 @@ class LongCatFlashMTP(nn.Module, SupportsPP):
             "model.mtp.layers.0.eh_proj.weight_scale_inv": "eh_proj.weight_scale_inv",
             "model.mtp.layers.0.enorm.m.weight": "enorm.weight",
             "model.mtp.layers.0.hnorm.m.weight": "hnorm.weight",
-            "model.mtp.layers.0.input_layernorm.weight": "model.layers.0.input_layernorm.weight",
-            "model.mtp.layers.0.post_attention_layernorm.weight": "model.layers.0.post_attention_layernorm.weight",
-            "model.mtp.layers.0.self_attn.kv_a_layernorm.weight": "model.layers.0.self_attn.kv_a_layernorm.weight",
-            "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight",
-            "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv",
-            "model.mtp.layers.0.self_attn.kv_b_proj.weight": "model.layers.0.self_attn.kv_b_proj.weight",
-            "model.mtp.layers.0.self_attn.kv_b_proj.weight_scale_inv": "model.layers.0.self_attn.kv_b_proj.weight_scale_inv",
-            "model.mtp.layers.0.self_attn.o_proj.weight": "model.layers.0.self_attn.o_proj.weight",
-            "model.mtp.layers.0.self_attn.o_proj.weight_scale_inv": "model.layers.0.self_attn.o_proj.weight_scale_inv",
-            "model.mtp.layers.0.self_attn.q_a_layernorm.weight": "model.layers.0.self_attn.q_a_layernorm.weight",
-            "model.mtp.layers.0.self_attn.q_a_proj.weight": "model.layers.0.self_attn.q_a_proj.weight",
-            "model.mtp.layers.0.self_attn.q_a_proj.weight_scale_inv": "model.layers.0.self_attn.q_a_proj.weight_scale_inv",
-            "model.mtp.layers.0.self_attn.q_b_proj.weight": "model.layers.0.self_attn.q_b_proj.weight",
-            "model.mtp.layers.0.self_attn.q_b_proj.weight_scale_inv": "model.layers.0.self_attn.q_b_proj.weight_scale_inv",
-            "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight": "model.layers.0.mlp.down_proj.weight",
-            "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight_scale_inv": "model.layers.0.mlp.down_proj.weight_scale_inv",
-            "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight": "model.layers.0.mlp.gate_proj.weight",
-            "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight_scale_inv": "model.layers.0.mlp.gate_proj.weight_scale_inv",
-            "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight": "model.layers.0.mlp.up_proj.weight",
-            "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight_scale_inv": "model.layers.0.mlp.up_proj.weight_scale_inv",
+            "model.mtp.layers.0.input_layernorm.weight": "model.layers.0.input_layernorm.weight",  # noqa: E501
+            "model.mtp.layers.0.post_attention_layernorm.weight": "model.layers.0.post_attention_layernorm.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.kv_a_layernorm.weight": "model.layers.0.self_attn.kv_a_layernorm.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv": "model.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.self_attn.kv_b_proj.weight": "model.layers.0.self_attn.kv_b_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.kv_b_proj.weight_scale_inv": "model.layers.0.self_attn.kv_b_proj.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.self_attn.o_proj.weight": "model.layers.0.self_attn.o_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.o_proj.weight_scale_inv": "model.layers.0.self_attn.o_proj.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.self_attn.q_a_layernorm.weight": "model.layers.0.self_attn.q_a_layernorm.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.q_a_proj.weight": "model.layers.0.self_attn.q_a_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.q_a_proj.weight_scale_inv": "model.layers.0.self_attn.q_a_proj.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.self_attn.q_b_proj.weight": "model.layers.0.self_attn.q_b_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.self_attn.q_b_proj.weight_scale_inv": "model.layers.0.self_attn.q_b_proj.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight": "model.layers.0.mlp.down_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight_scale_inv": "model.layers.0.mlp.down_proj.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight": "model.layers.0.mlp.gate_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight_scale_inv": "model.layers.0.mlp.gate_proj.weight_scale_inv",  # noqa: E501
+            "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight": "model.layers.0.mlp.up_proj.weight",  # noqa: E501
+            "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight_scale_inv": "model.layers.0.mlp.up_proj.weight_scale_inv",  # noqa: E501
             "model.mtp.norm.weight": "final_layernorm.weight",
         }
 
diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py
index abe6a63b5c..981f9b3784 100644
--- a/vllm/model_executor/models/phi4mm.py
+++ b/vllm/model_executor/models/phi4mm.py
@@ -1000,8 +1000,8 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
             "base_layer.": "",
         },
         orig_to_new_prefix={
-            "model.embed_tokens_extend.audio_embed.audio_projection.vision.": "embed_tokens_extend.audio_projection_for_vision.",
-            "model.embed_tokens_extend.audio_embed.audio_projection.speech.": "embed_tokens_extend.audio_projection.",
+            "model.embed_tokens_extend.audio_embed.audio_projection.vision.": "embed_tokens_extend.audio_projection_for_vision.",  # noqa: E501
+            "model.embed_tokens_extend.audio_embed.audio_projection.speech.": "embed_tokens_extend.audio_projection.",  # noqa: E501
             "model.embed_tokens_extend.audio_embed.": "embed_tokens_extend.",
             "model.embed_tokens_extend.image_embed.": "vision_encoder.",
         },
diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py
index 0d81cbbc03..cea3faf45a 100644
--- a/vllm/model_executor/models/qwen3_next.py
+++ b/vllm/model_executor/models/qwen3_next.py
@@ -916,8 +916,9 @@ class Qwen3NextDecoderLayer(nn.Module):
                 )
             else:
                 assert len(hidden_states.shape) == len(self.ffn_layer_scale.shape), (
-                    f"shape must be the same {len(hidden_states.shape)}, {len(self.ffn_layer_scale.shape)}"
-                )  # noqa: E501
+                    f"shape must be the same {len(hidden_states.shape)}, "
+                    f"{len(self.ffn_layer_scale.shape)}"
+                )
                 hidden_states = hidden_states * (
                     self.ffn_layer_scale.to(hidden_states.dtype) + 1
                 )
diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py
index a266f89bbb..f7ec18f5e9 100755
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -255,8 +255,8 @@ def is_rocm_aiter_fp8bmm_enabled() -> bool:
 
 
 if is_rocm_aiter_fp8bmm_enabled():
-    from aiter.ops.triton.batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant import (  # noqa: E501 # isort: skip
-        batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant as aiter_triton_fp8_bmm,
+    from aiter.ops.triton.batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant import (  # noqa: E501
+        batched_gemm_a8w8_a_per_token_group_prequant_w_per_batched_tensor_quant as aiter_triton_fp8_bmm,  # noqa: E501
     )
 
     def dynamic_per_batched_tensor_quant(
@@ -1284,8 +1284,10 @@ class MLACommonImpl(MLACommonBaseImpl[M], Generic[M]):
             actual_seq_lens_q=prefill.query_seq_lens.view(-1, 1, 1, 1),
             actual_seq_lens_kv=prefill.query_seq_lens.view(-1, 1, 1, 1),
             causal=True,
-            return_lse=True,  # do not support False for now
-            is_cuda_graph_compatible=True,  # Indicates actual_seq_lens are on GPU or CPU.
+            # Do not support False for now
+            return_lse=True,
+            # Indicates actual_seq_lens are on GPU or CPU.
+            is_cuda_graph_compatible=True,
         )
         if return_softmax_lse:
             return output, lse
@@ -1342,7 +1344,8 @@ class MLACommonImpl(MLACommonBaseImpl[M], Generic[M]):
             ),
             causal=False,
             return_lse=True,
-            is_cuda_graph_compatible=True,  # Indicates actual_seq_lens are on GPU or CPU.
+            # Indicates actual_seq_lens are on GPU or CPU.
+            is_cuda_graph_compatible=True,
         )
 
     def process_weights_after_loading(self, act_dtype: torch.dtype):
diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py
index c78d71c323..5f23cf80d5 100644
--- a/vllm/v1/engine/utils.py
+++ b/vllm/v1/engine/utils.py
@@ -872,10 +872,13 @@ def wait_for_engine_startup(
                 EngineHandshakeMetadata(
                     addresses=addresses,
                     parallel_config={
-                        "data_parallel_master_ip": parallel_config.data_parallel_master_ip,
-                        "data_parallel_master_port": parallel_config.data_parallel_master_port,
-                        "_data_parallel_master_port_list": parallel_config._data_parallel_master_port_list,
-                        "data_parallel_size": parallel_config.data_parallel_size,
+                        k: getattr(parallel_config, k)
+                        for k in (
+                            "data_parallel_master_ip",
+                            "data_parallel_master_port",
+                            "_data_parallel_master_port_list",
+                            "data_parallel_size",
+                        )
                     },
                 )
             )
diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py
index c96f221228..9259432628 100644
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@@ -345,13 +345,15 @@ def report_usage_stats(
 
     from vllm.model_executor.model_loader import get_architecture_class_name
 
+    parallel_config = vllm_config.parallel_config
+
     usage_message.report_usage(
         get_architecture_class_name(vllm_config.model_config),
         usage_context,
         extra_kvs={
             # Common configuration
             "dtype": str(vllm_config.model_config.dtype),
-            "tensor_parallel_size": vllm_config.parallel_config.tensor_parallel_size,
+            "tensor_parallel_size": parallel_config.tensor_parallel_size,
             "block_size": vllm_config.cache_config.block_size,
             "gpu_memory_utilization": vllm_config.cache_config.gpu_memory_utilization,
             "kv_cache_memory_bytes": vllm_config.cache_config.kv_cache_memory_bytes,
@@ -362,7 +364,7 @@ def report_usage_stats(
             "enable_lora": bool(vllm_config.lora_config),
             "enable_prefix_caching": vllm_config.cache_config.enable_prefix_caching,
             "enforce_eager": vllm_config.model_config.enforce_eager,
-            "disable_custom_all_reduce": vllm_config.parallel_config.disable_custom_all_reduce,
+            "disable_custom_all_reduce": parallel_config.disable_custom_all_reduce,
         },
     )
 
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 41c02bc410..b31571a7c0 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -3391,7 +3391,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                                 attn_metadata[ubid][layer_name] = attn_metadata_i
                     else:
                         assert type(attn_metadata) is dict
-                        attn_metadata_i = attn_group.get_metadata_builder().build_for_cudagraph_capture(
+                        metadata_builder = attn_group.get_metadata_builder()
+                        attn_metadata_i = metadata_builder.build_for_cudagraph_capture(
                             common_attn_metadata
                         )
                         for layer_name in attn_group.layer_names: