diff --git a/benchmarks/benchmark_ngram_proposer.py b/benchmarks/benchmark_ngram_proposer.py index 291d87d608..626b150ee4 100644 --- a/benchmarks/benchmark_ngram_proposer.py +++ b/benchmarks/benchmark_ngram_proposer.py @@ -164,7 +164,7 @@ def invoke_main() -> None: ) parser.add_argument( "--batched", action="store_true", help="consider time to prepare batch" - ) # noqa: E501 + ) parser.add_argument( "--num-iteration", type=int, diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index f6b48ad524..58b9767d09 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -909,13 +909,13 @@ def create_argument_parser(): parser.add_argument( "--tokenizer", type=str, - help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 + help="Name or path of the tokenizer, if not using the default tokenizer.", ) parser.add_argument( "--tokenizer-mode", type=str, default="auto", - help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 + help="Name or path of the tokenizer, if not using the default tokenizer.", ) parser.add_argument( "--num-prompts", diff --git a/csrc/cutlass_extensions/vllm_cutlass_library_extension.py b/csrc/cutlass_extensions/vllm_cutlass_library_extension.py index 7a81dd40c8..5e742d0b02 100644 --- a/csrc/cutlass_extensions/vllm_cutlass_library_extension.py +++ b/csrc/cutlass_extensions/vllm_cutlass_library_extension.py @@ -72,8 +72,8 @@ VLLMKernelScheduleTag: dict[ ] = { **KernelScheduleTag, # type: ignore **{ - MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized", - MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong", - MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative", + MixedInputKernelScheduleType.TmaWarpSpecialized: "cutlass::gemm::KernelTmaWarpSpecialized", # noqa: E501 + MixedInputKernelScheduleType.TmaWarpSpecializedPingpong: "cutlass::gemm::KernelTmaWarpSpecializedPingpong", # noqa: E501 + MixedInputKernelScheduleType.TmaWarpSpecializedCooperative: "cutlass::gemm::KernelTmaWarpSpecializedCooperative", # noqa: E501 }, } diff --git a/examples/offline_inference/vision_language_pooling.py b/examples/offline_inference/vision_language_pooling.py index 6f8679918c..33ffb59014 100644 --- a/examples/offline_inference/vision_language_pooling.py +++ b/examples/offline_inference/vision_language_pooling.py @@ -113,7 +113,7 @@ def run_e5_v(query: Query) -> ModelRequestData: def _get_vlm2vec_prompt_image(query: Query, image_token: str): if query["modality"] == "text": text = query["text"] - prompt = f"Find me an everyday image that matches the given caption: {text}" # noqa: E501 + prompt = f"Find me an everyday image that matches the given caption: {text}" image = None elif query["modality"] == "image": prompt = f"{image_token} Find a day-to-day image that looks similar to the provided image." # noqa: E501 diff --git a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py index d39edb0b9d..1df11d9d84 100644 --- a/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py +++ b/examples/online_serving/disaggregated_serving/disagg_proxy_demo.py @@ -203,9 +203,9 @@ class Proxy: async with session.post( url=url, json=data, headers=headers ) as response: - if 200 <= response.status < 300 or 400 <= response.status < 500: # noqa: E501 + if 200 <= response.status < 300 or 400 <= response.status < 500: if use_chunked: - async for chunk_bytes in response.content.iter_chunked( # noqa: E501 + async for chunk_bytes in response.content.iter_chunked( 1024 ): yield chunk_bytes diff --git a/pyproject.toml b/pyproject.toml index b3cae3d00c..704f28fa65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,52 +56,6 @@ include = ["vllm*"] "vllm/third_party/**" = ["ALL"] "vllm/version.py" = ["F401"] "vllm/_version.py" = ["ALL"] -# TEMPORARY! These ignores will be fixed forward -## Line length violations -"csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"] -"tests/compile/piecewise/test_simple.py" = ["E501"] -"tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"] -"tests/entrypoints/conftest.py" = ["E501"] -"tests/entrypoints/openai/test_audio.py" = ["E501"] -"tests/entrypoints/openai/test_chat.py" = ["E501"] -"tests/entrypoints/openai/test_chat_template.py" = ["E501"] -"tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"] -"tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"] -"tests/entrypoints/openai/test_video.py" = ["E501"] -"tests/entrypoints/openai/test_vision.py" = ["E501"] -"tests/entrypoints/test_chat_utils.py" = ["E501"] -"tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"] -"tests/models/language/generation/test_gemma.py" = ["E501"] -"tests/models/language/generation/test_mistral.py" = ["E501"] -"tests/models/multimodal/generation/test_ultravox.py" = ["E501"] -"tests/models/multimodal/generation/test_voxtral.py" = ["E501"] -"tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"] -"tests/tool_use/test_tool_choice_required.py" = ["E501"] -"tests/v1/attention/utils.py" = ["E501"] -"tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"] -"tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"] -"tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"] -"tests/v1/logits_processors/test_custom_offline.py" = ["E501"] -"vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"] -"vllm/compilation/collective_fusion.py" = ["E501"] -"vllm/compilation/wrapper.py" = ["E501"] -"vllm/config/vllm.py" = ["E501"] -"vllm/distributed/device_communicators/all2all.py" = ["E501"] -"vllm/entrypoints/openai/protocol.py" = ["E501"] -"vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"] -"vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"] -"vllm/model_executor/models/bailing_moe.py" = ["E501"] -"vllm/model_executor/models/hyperclovax_vision.py" = ["E501"] -"vllm/model_executor/models/llama4_eagle.py" = ["E501"] -"vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"] -"vllm/model_executor/models/phi4mm.py" = ["E501"] -"vllm/model_executor/models/qwen3_next.py" = ["E501"] -"vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"] -"vllm/v1/attention/backends/mla/common.py" = ["E501"] -"vllm/v1/engine/utils.py" = ["E501"] -"vllm/v1/utils.py" = ["E501"] -"vllm/v1/worker/gpu_model_runner.py" = ["E501"] -# End of temporary ignores [tool.ruff.lint] select = [ diff --git a/tests/compile/piecewise/test_simple.py b/tests/compile/piecewise/test_simple.py index ea80c43c9d..920cd5a06c 100644 --- a/tests/compile/piecewise/test_simple.py +++ b/tests/compile/piecewise/test_simple.py @@ -132,10 +132,14 @@ def test_simple_piecewise_compile(use_inductor): splitting_ops=["silly.attention"], use_inductor_graph_partition=False, use_inductor=use_inductor, - expected_num_piecewise_graphs_seen=5, # 2 * num_layers + 1 - expected_num_piecewise_capturable_graphs_seen=3, # 1 + num_layers - expected_num_backend_compilations=3, # num_piecewise_capturable_graphs_seen - expected_num_cudagraph_captured=6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen + # 2 * num_layers + 1 + expected_num_piecewise_graphs_seen=5, + # 1 + num_layers + expected_num_piecewise_capturable_graphs_seen=3, + # num_piecewise_capturable_graphs_seen + expected_num_backend_compilations=3, + # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen + expected_num_cudagraph_captured=6, ) @@ -147,14 +151,16 @@ def test_simple_inductor_graph_partition(splitting_ops): pytest.skip("inductor graph partition is only available in PyTorch 2.9+") _run_simple_model( - # inductor graph partition automatically resets splitting_ops - # to be an empty list + # Inductor graph partition automatically resets splitting_ops to an empty list splitting_ops=splitting_ops, use_inductor_graph_partition=True, use_inductor=True, - expected_num_piecewise_graphs_seen=1, # since not splitting at fx graph level - expected_num_piecewise_capturable_graphs_seen=1, # since not splitting at fx graph level - expected_num_backend_compilations=1, # since not splitting at fx graph level - expected_num_cudagraph_captured=6, # inductor graph partition still captures 6 - # graph, same as fx graph partition. + # Since not splitting at fx graph level + expected_num_piecewise_graphs_seen=1, + # Since not splitting at fx graph level + expected_num_piecewise_capturable_graphs_seen=1, + # Since not splitting at fx graph level + expected_num_backend_compilations=1, + # Inductor graph partition still captures 6 graph, same as fx graph partition + expected_num_cudagraph_captured=6, ) diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py index 46b9e4bf8d..e053367fb3 100644 --- a/tests/compile/piecewise/test_toy_llama.py +++ b/tests/compile/piecewise/test_toy_llama.py @@ -367,11 +367,14 @@ def test_toy_llama(use_inductor: bool): kwargs = {"num_eager_compiles": 1, "num_inductor_compiles": 0} with compilation_counter.expect( - num_graphs_seen=1, # one graph for the model + # One graph for the model + num_graphs_seen=1, num_piecewise_graphs_seen=1, num_piecewise_capturable_graphs_seen=1, - num_backend_compilations=1, # num_piecewise_capturable_graphs_seen - num_cudagraph_captured=2, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen + # num_piecewise_capturable_graphs_seen + num_backend_compilations=1, + # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen + num_cudagraph_captured=2, **kwargs, ): outputs.append( @@ -478,9 +481,10 @@ def benchmark(): # it is fine here, because we only use the lambda function once. runtime = do_bench( lambda: graphs[b][0]( # noqa - input_ids[:b], positions[:b] + input_ids[:b], # noqa + positions[:b], # noqa ) - ) # noqa + ) piecewise_cudagraph_time[b] = runtime else: runtime = do_bench(lambda: graphs[b][0].replay()) # noqa diff --git a/tests/compile/test_functionalization.py b/tests/compile/test_functionalization.py index 95e92a8780..ae17bc67b1 100644 --- a/tests/compile/test_functionalization.py +++ b/tests/compile/test_functionalization.py @@ -243,7 +243,7 @@ def test_fix_functionalization(model_class: torch.nn.Module, do_fusion: bool): # check if the functionalization pass is applied for op in model.ops_in_model(do_fusion): find_auto_fn(backend_no_func.graph_post_pass.nodes, op) - assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # noqa: E501 + assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # make sure the ops were all de-functionalized found = dict() diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index 25b3f16bd9..1fd5c26765 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -565,7 +565,7 @@ def test_attention_quant_pattern( elif quant_key.dtype == FP4_DTYPE: assert attn_nodes_post[0].kwargs.get("output_block_scale") is not None, ( "Attention should have output_block_scale after FP4 fusion" - ) # noqa: E501 + ) # Check that results are close torch.testing.assert_close(result_unfused, result_fused_1, atol=1e-2, rtol=1e-2) diff --git a/tests/compile/test_sequence_parallelism.py b/tests/compile/test_sequence_parallelism.py index 1d14a89c3a..afb31cb95b 100644 --- a/tests/compile/test_sequence_parallelism.py +++ b/tests/compile/test_sequence_parallelism.py @@ -186,7 +186,7 @@ class TestQuantModel(torch.nn.Module): ): # If fusion happens, the fused op is the one # we check for (de)functionalization - return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default] # noqa: E501 + return [torch.ops._C.fused_add_rms_norm_static_fp8_quant.default] else: # If no fusion, the original ops are checked return [ @@ -322,7 +322,7 @@ def sequence_parallelism_pass_on_test_model( # check if the functionalization pass is applied for op in model.ops_in_model(): find_auto_fn(backend_no_func.graph_post_pass.nodes, op) - assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # noqa: E501 + assert find_auto_fn_maybe(backend_func.graph_post_pass.nodes, op) is None # make sure the ops were all de-functionalized found = dict() diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 119e8e7621..7d55c40754 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -104,7 +104,7 @@ TEXT_GENERATION_MODELS = { # [Decoder-only] # Uses Llama # "BAAI/AquilaChat-7B": PPTestSettings.fast(), - "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"), # noqa: E501 + "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(load_format="dummy"), "baichuan-inc/Baichuan-7B": PPTestSettings.fast(), "baichuan-inc/Baichuan2-13B-Chat": PPTestSettings.fast(), "bigscience/bloomz-1b1": PPTestSettings.fast(), @@ -138,7 +138,7 @@ TEXT_GENERATION_MODELS = { # Uses Llama # "mistralai/Mistral-7B-Instruct-v0.1": PPTestSettings.fast(), "state-spaces/mamba-130m-hf": PPTestSettings.fast(), - "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"), # noqa: E501 + "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(load_format="dummy"), "mosaicml/mpt-7b": PPTestSettings.fast(), "nvidia/Minitron-8B-Base": PPTestSettings.fast(), "allenai/OLMo-1B-hf": PPTestSettings.fast(), @@ -151,13 +151,13 @@ TEXT_GENERATION_MODELS = { "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(), "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed( multi_node_only=True, load_format="dummy" - ), # noqa: E501 + ), "Qwen/Qwen-7B-Chat": PPTestSettings.fast(), "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(), "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(), "stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(), "bigcode/starcoder2-3b": PPTestSettings.fast(), - "upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"), # noqa: E501 + "upstage/solar-pro-preview-instruct": PPTestSettings.fast(load_format="dummy"), # FIXME: Cannot load tokenizer in latest transformers version. # Need to use tokenizer from `meta-llama/Llama-2-7b-chat-hf` # "xverse/XVERSE-7B-Chat": PPTestSettings.fast(), diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py index e03d34ac8f..a52e1cb7df 100644 --- a/tests/entrypoints/conftest.py +++ b/tests/entrypoints/conftest.py @@ -83,7 +83,8 @@ def sample_complex_json_schema(): "type": "array", "items": { "type": "string", - "pattern": "^[a-z]{1,10}$", # Combining length and pattern restrictions + # Combining length and pattern restrictions + "pattern": "^[a-z]{1,10}$", }, }, }, diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index f3e89f48f9..a96f0134c2 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -145,7 +145,7 @@ async def test_single_chat_session_audio_base64encoded( { "type": "audio_url", "audio_url": { - "url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}" + "url": f"data:audio/wav;base64,{base64_encoded_audio[audio_url]}" # noqa: E501 }, }, {"type": "text", "text": "What's happening in this audio?"}, diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index e1d5971052..ed0b284bda 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -835,17 +835,18 @@ async def test_extra_fields_allowed(client: openai.AsyncOpenAI): @pytest.mark.asyncio async def test_complex_message_content(client: openai.AsyncOpenAI): + content = [ + { + "type": "text", + "text": "what is 1+1? please provide the result without any other text.", + } + ] resp = await client.chat.completions.create( model=MODEL_NAME, messages=[ { "role": "user", - "content": [ - { - "type": "text", - "text": "what is 1+1? please provide the result without any other text.", - } - ], + "content": content, } ], temperature=0, diff --git a/tests/entrypoints/openai/test_chat_template.py b/tests/entrypoints/openai/test_chat_template.py index adaeb96fbf..d1202a5975 100644 --- a/tests/entrypoints/openai/test_chat_template.py +++ b/tests/entrypoints/openai/test_chat_template.py @@ -76,8 +76,8 @@ def test_load_chat_template(): assert ( template_content == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %} -{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}""" - ) # noqa: E501 +{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}""" # noqa: E501 + ) def test_no_load_chat_template_filelike(): diff --git a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py index 4f23eee462..e452b578ba 100644 --- a/tests/entrypoints/openai/test_chat_with_tool_reasoning.py +++ b/tests/entrypoints/openai/test_chat_with_tool_reasoning.py @@ -45,12 +45,13 @@ TOOLS = [ "properties": { "city": { "type": "string", - "description": "The city to find the weather for, e.g. 'San Francisco'", + "description": "The city to find the weather for, e.g. " + "'San Francisco'", }, "state": { "type": "string", - "description": "the two-letter abbreviation for the state that the city is" - " in, e.g. 'CA' which would mean 'California'", + "description": "the two-letter abbreviation for the state that " + "the city is in, e.g. 'CA' which would mean 'California'", }, "unit": { "type": "string", @@ -69,7 +70,8 @@ MESSAGES = [ {"role": "assistant", "content": "I'm doing well! How can I help you?"}, { "role": "user", - "content": "Can you tell me what the temperate will be in Dallas, in fahrenheit?", + "content": "Can you tell me what the temperate will be in Dallas, " + "in fahrenheit?", }, ] diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index 599d65187e..e64f68cad7 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -25,12 +25,14 @@ tools = [ "properties": { "city": { "type": "string", - "description": "The city to find the weather for, e.g. 'Vienna'", + "description": "The city to find the weather for, e.g. " + "'Vienna'", "default": "Vienna", }, "country": { "type": "string", - "description": "The country that the city is in, e.g. 'Austria'", + "description": "The country that the city is in, e.g. " + "'Austria'", }, "unit": { "type": "string", @@ -85,12 +87,14 @@ tools = [ "properties": { "city": { "type": "string", - "description": "The city to get the forecast for, e.g. 'Vienna'", + "description": "The city to get the forecast for, e.g. " + "'Vienna'", "default": "Vienna", }, "country": { "type": "string", - "description": "The country that the city is in, e.g. 'Austria'", + "description": "The country that the city is in, e.g. " + "'Austria'", }, "days": { "type": "integer", diff --git a/tests/entrypoints/openai/test_video.py b/tests/entrypoints/openai/test_video.py index a0d118743e..4c7d1c14ca 100644 --- a/tests/entrypoints/openai/test_video.py +++ b/tests/entrypoints/openai/test_video.py @@ -179,7 +179,7 @@ async def test_single_chat_session_video_base64encoded( { "type": "video_url", "video_url": { - "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}" + "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}" # noqa: E501 }, }, {"type": "text", "text": "What's in this video?"}, @@ -238,7 +238,7 @@ async def test_single_chat_session_video_base64encoded_beamsearch( { "type": "video_url", "video_url": { - "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}" + "url": f"data:video/jpeg;base64,{base64_encoded_video[video_url]}" # noqa: E501 }, }, {"type": "text", "text": "What's in this video?"}, diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index cab23d1e2f..5a15a352f4 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -233,7 +233,7 @@ async def test_single_chat_session_image_base64encoded( { "type": "image_url", "image_url": { - "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}" + "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}" # noqa: E501 }, }, {"type": "text", "text": content_text}, @@ -300,7 +300,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch( { "type": "image_url", "image_url": { - "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}" + "url": f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}" # noqa: E501 }, }, {"type": "text", "text": "What's in this image?"}, diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 975ca53a3a..6e92419c4f 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -947,7 +947,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt( {"type": "image_url", "image_url": {"url": image_url}}, { "type": "text", - "text": "What's in <|image_1|> and how does it compare to the other one?", # noqa: E501 + "text": "What's in <|image_1|> and how does it compare to " + "the other one?", }, ], } @@ -960,8 +961,8 @@ def test_parse_chat_messages_placeholder_one_already_in_prompt( assert conversation == [ { "role": "user", - "content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to the " - "other one?", + "content": "<|image_2|>\nWhat's in <|image_1|> and how does it compare to " + "the other one?", } ] _assert_mm_data_is_image_input(mm_data, 2) @@ -1364,7 +1365,7 @@ def test_parse_chat_messages_multiple_images_multiple_messages_interleave( _assert_mm_uuids(mm_uuids, 2, expected_uuids=[None, None]) -def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave( # noqa: E501 +def test_parse_chat_messages_multiple_images_with_uuids_multiple_messages_interleave( phi3v_model_config_mm_interleaved, phi3v_tokenizer, image_url, @@ -1451,14 +1452,14 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave( assert conversation == [ { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501 + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", }, {"role": "assistant", "content": "Some stuff."}, { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", }, ] @@ -1468,7 +1469,7 @@ def test_parse_chat_messages_multiple_modals_multiple_messages_interleave( _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[None]) -def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave( # noqa: E501 +def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interleave( qwen25omni_model_config_mm_interleaved, qwen25omni_tokenizer, image_url, @@ -1521,14 +1522,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_messages_interl assert conversation == [ { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501 + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", }, {"role": "assistant", "content": "Some stuff."}, { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", }, ] @@ -1593,14 +1594,14 @@ def test_parse_chat_messages_multiple_modals_with_uuids_multiple_empty_media_mes assert conversation == [ { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501 + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", }, {"role": "assistant", "content": "Some stuff."}, { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", }, ] @@ -1661,14 +1662,14 @@ def test_parse_chat_messages_multiple_modals_with_partial_uuids_multiple_message assert conversation == [ { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "Now listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", # noqa: E501 + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nNow listen to this audio\nAudio 1: <|audio_bos|><|AUDIO|><|audio_eos|>", }, {"role": "assistant", "content": "Some stuff."}, { "role": "user", - "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>\n" - "And what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", + "content": "What's on this image?\n<|vision_start|><|IMAGE|><|vision_end|>" + "\nAnd what's in the video?\n<|vision_start|><|VIDEO|><|vision_end|>", }, ] @@ -2193,7 +2194,8 @@ def test_parse_chat_messages_single_empty_audio_with_uuid( assert conversation == [ { "role": "user", - "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?", + "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the " + "audio say?", } ] _assert_mm_data_inputs(mm_data, {"audio": 1}) @@ -2228,7 +2230,8 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async( assert conversation == [ { "role": "user", - "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?", + "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the " + "audio say?", } ] _assert_mm_data_inputs(await mm_future, {"audio": 1}) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 6037424bde..6b99ba7af5 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -165,7 +165,7 @@ def test_env( # FlashMLA only supports block_size == 64 pytest.skip("FlashMLA only supports block_size 64") else: - from vllm.v1.attention.backends.mla.flashmla import ( # noqa: E501 + from vllm.v1.attention.backends.mla.flashmla import ( is_flashmla_supported, ) diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py index 903f13e444..091fa4fafe 100644 --- a/tests/kernels/moe/modular_kernel_tools/common.py +++ b/tests/kernels/moe/modular_kernel_tools/common.py @@ -331,7 +331,8 @@ class WeightTensors: in_dtype=config.dtype, quant_dtype=config.quant_dtype, block_shape=config.quant_block_shape, - per_out_ch_quant=config.is_per_act_token_quant, # or config.is_per_out_ch_quant + # or config.is_per_out_ch_quant + per_out_ch_quant=config.is_per_act_token_quant, ) return WeightTensors( w1=w1, w2=w2, w1_scale=w1_scale, w2_scale=w2_scale, w1_gs=w1_gs, w2_gs=w2_gs diff --git a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py index 9040db017f..0ef306051c 100644 --- a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py +++ b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py @@ -124,7 +124,7 @@ def make_feature_matrix(csv_file_path: str): results_df: Optional[pd.DataFrame] = None for m, k, n, e, topks, dtype, pf_type, experts_type, quant_config in tqdm( combinations - ): # noqa: E501 + ): config = Config( Ms=[m], K=k, diff --git a/tests/kernels/moe/modular_kernel_tools/mk_objects.py b/tests/kernels/moe/modular_kernel_tools/mk_objects.py index d4b42ed8ea..566fb1e09d 100644 --- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py +++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py @@ -10,7 +10,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import ( BatchedDeepGemmExperts, ) -from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( # noqa: E501 +from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( BatchedTritonOrDeepGemmExperts, ) from vllm.model_executor.layers.fused_moe.config import ( @@ -196,10 +196,10 @@ register_experts( # Disable on blackwell for now if has_deep_ep() and not current_platform.has_device_capability(100): - from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( DeepEPHTPrepareAndFinalize, ) - from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( DeepEPLLPrepareAndFinalize, ) @@ -233,7 +233,7 @@ if has_pplx(): ) if has_flashinfer_cutlass_fused_moe() and current_platform.has_device_capability(100): - from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( FlashInferExperts, ) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 diff --git a/tests/kernels/moe/parallel_utils.py b/tests/kernels/moe/parallel_utils.py index 9d087ad13b..fb9e5df281 100644 --- a/tests/kernels/moe/parallel_utils.py +++ b/tests/kernels/moe/parallel_utils.py @@ -17,10 +17,10 @@ from typing_extensions import Concatenate, ParamSpec from vllm.utils import get_open_port, has_deep_ep if has_deep_ep(): - from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( DeepEPHTPrepareAndFinalize, ) - from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( DeepEPLLPrepareAndFinalize, ) diff --git a/tests/kernels/moe/test_deepep_deepgemm_moe.py b/tests/kernels/moe/test_deepep_deepgemm_moe.py index cfa752d6ea..e68c5bfa59 100644 --- a/tests/kernels/moe/test_deepep_deepgemm_moe.py +++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py @@ -30,10 +30,10 @@ from .parallel_utils import ProcessGroupInfo, parallel_launch from .utils import make_test_weights if has_deep_ep(): - from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( DeepEPHTPrepareAndFinalize, ) - from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( DeepEPLLPrepareAndFinalize, ) diff --git a/tests/kernels/moe/test_deepep_moe.py b/tests/kernels/moe/test_deepep_moe.py index f98e4407a0..a1dabea1f0 100644 --- a/tests/kernels/moe/test_deepep_moe.py +++ b/tests/kernels/moe/test_deepep_moe.py @@ -28,10 +28,10 @@ from ...utils import multi_gpu_test from .parallel_utils import ProcessGroupInfo, parallel_launch if has_deep_ep(): - from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( DeepEPHTPrepareAndFinalize, ) - from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( # noqa: E501 + from vllm.model_executor.layers.fused_moe.deepep_ll_prepare_finalize import ( DeepEPLLPrepareAndFinalize, ) diff --git a/tests/kernels/moe/test_modular_kernel_combinations.py b/tests/kernels/moe/test_modular_kernel_combinations.py index fdd46d7e1b..9c41145235 100644 --- a/tests/kernels/moe/test_modular_kernel_combinations.py +++ b/tests/kernels/moe/test_modular_kernel_combinations.py @@ -271,7 +271,7 @@ if __name__ == "__main__": parser = make_config_arg_parser( description=( "Run single prepare-finalize & fused-experts combination test" - "Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations " # noqa: E501 + "Example : python3 -m tests.kernels.moe.test_modular_kernel_combinations " "--pf-type PplxPrepareAndFinalize --experts-type BatchedTritonExperts" ) ) diff --git a/tests/kernels/moe/test_moe.py b/tests/kernels/moe/test_moe.py index 8dcefa9f7a..9354e81987 100644 --- a/tests/kernels/moe/test_moe.py +++ b/tests/kernels/moe/test_moe.py @@ -483,8 +483,8 @@ def test_mixtral_moe( } if use_rocm_aiter: - # The values of rtol and atol are set based on the tests in ROCM AITER package. # noqa: E501 - # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174 # noqa: E501 + # The values of rtol and atol are set based on the tests in ROCM AITER package. + # https://github.com/ROCm/aiter/blob/dfed377f4be7da96ca2d75ac0761f569676f7240/op_tests/test_moe.py#L174 torch.testing.assert_close( hf_states.flatten(0, 1), vllm_states, rtol=0.01, atol=100 ) diff --git a/tests/kernels/moe/test_mxfp4_moe.py b/tests/kernels/moe/test_mxfp4_moe.py index d53f851e01..83241c0008 100644 --- a/tests/kernels/moe/test_mxfp4_moe.py +++ b/tests/kernels/moe/test_mxfp4_moe.py @@ -10,11 +10,11 @@ import pytest import torch from packaging import version -from vllm.model_executor.layers.quantization.quark.quark import ( # noqa: E501 +from vllm.model_executor.layers.quantization.quark.quark import ( QuarkLinearMethod, QuarkW4A4MXFP4, ) -from vllm.model_executor.layers.quantization.quark.quark_moe import ( # noqa: E501 +from vllm.model_executor.layers.quantization.quark.quark_moe import ( QuarkW4A4MXFp4MoEMethod, ) from vllm.platforms import current_platform diff --git a/tests/lora/test_chatglm3_tp.py b/tests/lora/test_chatglm3_tp.py index 2ae651a2cf..d8058c5f87 100644 --- a/tests/lora/test_chatglm3_tp.py +++ b/tests/lora/test_chatglm3_tp.py @@ -12,7 +12,7 @@ PROMPT_TEMPLATE = """I want you to act as a SQL terminal in front of an example EXPECTED_LORA_OUTPUT = [ "SELECT count(*) FROM singer", - "SELECT avg(age) , min(age) , max(age) FROM singer WHERE country = 'France'", # noqa: E501 + "SELECT avg(age) , min(age) , max(age) FROM singer WHERE country = 'France'", "SELECT name , country , age FROM singer ORDER BY age", ] @@ -21,10 +21,16 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]: prompts = [ PROMPT_TEMPLATE.format(query="How many singers do we have?"), PROMPT_TEMPLATE.format( - query="What is the average, minimum, and maximum age of all singers from France?" # noqa: E501 + query=( + "What is the average, minimum, and maximum " + "age of all singers from France?" + ) ), PROMPT_TEMPLATE.format( - query="Show name, country, age for all singers ordered by age from the oldest to the youngest." # noqa: E501 + query=( + "Show name, country, age for all singers ordered " + "by age from the oldest to the youngest." + ) ), ] sampling_params = vllm.SamplingParams(temperature=0, max_tokens=32) diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py index 1aea3986d5..0d9431bd7a 100644 --- a/tests/lora/test_llama_tp.py +++ b/tests/lora/test_llama_tp.py @@ -15,10 +15,10 @@ MODEL_PATH = "meta-llama/Llama-2-7b-hf" EXPECTED_LORA_OUTPUT = [ " SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ", # noqa: E501 - " SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ", # noqa: E501 + " SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ", " SELECT one_mora FROM table_name_95 WHERE gloss = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] AND accented_mora = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] ", # noqa: E501 " SELECT sex FROM people WHERE people_id IN (SELECT people_id FROM candidate GROUP BY sex ORDER BY COUNT(people_id) DESC LIMIT 1) ", # noqa: E501 - " SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ", # noqa: E501 + " SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ", " SELECT womens_doubles FROM table_28138035_4 WHERE mens_singles = 'Werner Schlager' ", # noqa: E501 ] diff --git a/tests/lora/test_llm_with_multi_loras.py b/tests/lora/test_llm_with_multi_loras.py index 12855f108f..269a1ade77 100644 --- a/tests/lora/test_llm_with_multi_loras.py +++ b/tests/lora/test_llm_with_multi_loras.py @@ -26,7 +26,7 @@ LORA_RANK = 8 LORA_TEST_PROMPTS = ["What is GitHub?", "Hi, tell me about you"] LORA_TEST_EXPECTED = [ "GitHub is an open-source platform that provides a way to manage and develop software projects. It allows developers to store and manage code, collaborate on projects, and automate tasks.", # noqa: E501 - "I am Alice, an AI assistant developed by GitHub/Charent.", # noqa: E501 + "I am Alice, an AI assistant developed by GitHub/Charent.", ] diff --git a/tests/models/language/generation/test_gemma.py b/tests/models/language/generation/test_gemma.py index 53bff5ed86..246b893be3 100644 --- a/tests/models/language/generation/test_gemma.py +++ b/tests/models/language/generation/test_gemma.py @@ -16,7 +16,7 @@ def test_dummy_loader(vllm_runner, monkeypatch, model: str) -> None: ) as llm: if model == "google/gemma-3-4b-it": normalizers = llm.llm.collective_rpc( - lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item() + lambda self: self.model_runner.model.language_model.model.normalizer.cpu().item() # noqa: E501 ) config = llm.llm.llm_engine.model_config.hf_config.text_config else: diff --git a/tests/models/language/generation/test_mistral.py b/tests/models/language/generation/test_mistral.py index ece2bed4f5..0ae83ec160 100644 --- a/tests/models/language/generation/test_mistral.py +++ b/tests/models/language/generation/test_mistral.py @@ -46,12 +46,13 @@ TOOLS = [ "properties": { "city": { "type": "string", - "description": "The city to find the weather for, e.g. 'San Francisco'", + "description": "The city to find the weather for, e.g. " + "'San Francisco'", }, "state": { "type": "string", - "description": "the two-letter abbreviation for the state that the city is" - " in, e.g. 'CA' which would mean 'California'", + "description": "the two-letter abbreviation for the state that " + "the city is in, e.g. 'CA' which would mean 'California'", }, "unit": { "type": "string", @@ -85,7 +86,8 @@ MSGS = [ {"role": "system", "content": "You are an assistant."}, { "role": "user", - "content": "Could you please rewrite the below article? \n\n My English needs improvving, maybe I make errors.", # noqa + "content": "Could you please rewrite the below article? \n\n My English needs " + "improvving, maybe I make errors.", }, { "role": "assistant", @@ -96,14 +98,16 @@ MSGS = [ "type": "function", "function": { "name": "rewrite", - "arguments": '{"text":"My English needs improvving, maybe I make errors."}', # noqa + "arguments": '{"text":"My English needs improvving, maybe ' + 'I make errors."}', }, } ], }, { "role": "tool", - "content": '{"action":"rewrite","outcome":"My English needs improving, maybe I make errors."}', # noqa + "content": '{"action":"rewrite","outcome":"My English needs improving, maybe ' + 'I make errors."}', "tool_call_id": "bbc5b7ede", "name": "rewrite", }, diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 9168778a16..656a6d3bd7 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -130,14 +130,14 @@ VLM_TEST_SETTINGS = { dtype="bfloat16", marks=[ pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask") - ], # noqa: E501 + ], ), "qwen2_5_vl": VLMTestInfo( models=["Qwen/Qwen2.5-VL-3B-Instruct"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO), prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501 - video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501 + img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", + video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", max_model_len=4096, max_num_seqs=2, auto_cls=AutoModelForImageTextToText, @@ -149,8 +149,8 @@ VLM_TEST_SETTINGS = { models=["Qwen/Qwen2.5-Omni-3B"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO), prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>", # noqa: E501 - video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", # noqa: E501 + img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>", + video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>", max_model_len=4096, max_num_seqs=2, num_logprobs=6 if current_platform.is_cpu() else 5, @@ -181,7 +181,7 @@ VLM_TEST_SETTINGS = { max_model_len=16384, hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs( "llava-hf/llava-onevision-qwen2-0.5b-ov-hf" - ), # noqa: E501 + ), auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, image_size_factors=[(0.25, 0.5, 1.0)], @@ -213,7 +213,7 @@ VLM_TEST_SETTINGS = { models=["Qwen/Qwen2.5-VL-3B-Instruct"], test_type=VLMTestType.IMAGE, prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501 + img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", max_model_len=4096, max_num_seqs=2, auto_cls=AutoModelForImageTextToText, @@ -237,10 +237,10 @@ VLM_TEST_SETTINGS = { single_image_prompts=IMAGE_ASSETS.prompts( { "stop_sign": "Please describe the image shortly.", - "cherry_blossom": "Please infer the season with reason.", # noqa: E501 + "cherry_blossom": "Please infer the season with reason.", } ), - multi_image_prompt="Describe the two images shortly.", # noqa: E501 + multi_image_prompt="Describe the two images shortly.", stop_str=["<|im_end|>"], image_size_factors=[(0.10, 0.15)], max_tokens=64, @@ -252,11 +252,11 @@ VLM_TEST_SETTINGS = { prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501 single_image_prompts=IMAGE_ASSETS.prompts( { - "stop_sign": "What's the content in the center of the image?", # noqa: E501 - "cherry_blossom": "What is the season?", # noqa: E501 + "stop_sign": "What's the content in the center of the image?", + "cherry_blossom": "What is the season?", } ), - multi_image_prompt="Describe the two images in detail.", # noqa: E501 + multi_image_prompt="Describe the two images in detail.", max_model_len=4096, max_num_seqs=2, auto_cls=AutoModelForImageTextToText, @@ -268,11 +268,11 @@ VLM_TEST_SETTINGS = { prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501 single_image_prompts=IMAGE_ASSETS.prompts( { - "stop_sign": "What's the content in the center of the image?", # noqa: E501 - "cherry_blossom": "What is the season?", # noqa: E501 + "stop_sign": "What's the content in the center of the image?", + "cherry_blossom": "What is the season?", } ), - multi_image_prompt="Describe the two images in detail.", # noqa: E501 + multi_image_prompt="Describe the two images in detail.", max_model_len=4096, max_num_seqs=2, auto_cls=AutoModelForImageTextToText, @@ -311,14 +311,14 @@ VLM_TEST_SETTINGS = { max_num_seqs=2, single_image_prompts=IMAGE_ASSETS.prompts( { - "stop_sign": "\nWhat's the content in the center of the image?", # noqa: E501 + "stop_sign": "\nWhat's the content in the center of the image?", "cherry_blossom": "\nPlease infer the season with reason in details.", # noqa: E501 } ), multi_image_prompt="image_1:\nimage_2:\nWhich image can we see the car and the tower?", # noqa: E501 patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner, hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output, - stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501 + stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)], ), "fuyu": VLMTestInfo( @@ -342,7 +342,7 @@ VLM_TEST_SETTINGS = { single_image_prompts=IMAGE_ASSETS.prompts( { "stop_sign": "What's the content in the center of the image?", # noqa: E501 - "cherry_blossom": "What is the season?", # noqa: E501 + "cherry_blossom": "What is the season?", } ), multi_image_prompt="Describe the two images in detail.", # noqa: E501 @@ -356,7 +356,7 @@ VLM_TEST_SETTINGS = { "glm4v": VLMTestInfo( models=["zai-org/glm-4v-9b"], test_type=VLMTestType.IMAGE, - prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", # noqa: E501 + prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", single_image_prompts=IMAGE_ASSETS.prompts( { "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?", # noqa: E501 @@ -377,9 +377,9 @@ VLM_TEST_SETTINGS = { "glm4_1v": VLMTestInfo( models=["zai-org/GLM-4.1V-9B-Thinking"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), - prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", # noqa: E501 - img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>", # noqa: E501 - video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>", # noqa: E501 + prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>", + img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>", + video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>", max_model_len=2048, max_num_seqs=2, get_stop_token_ids=lambda tok: [151329, 151336, 151338], @@ -410,10 +410,10 @@ VLM_TEST_SETTINGS = { "h2oai/h2ovl-mississippi-2b", ], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), - prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>", # noqa: E501 + prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>", single_image_prompts=IMAGE_ASSETS.prompts( { - "stop_sign": "\nWhat's the content in the center of the image?", # noqa: E501 + "stop_sign": "\nWhat's the content in the center of the image?", "cherry_blossom": "\nWhat is the season?", } ), @@ -444,7 +444,7 @@ VLM_TEST_SETTINGS = { prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501 single_image_prompts=IMAGE_ASSETS.prompts( { - "stop_sign": "\nWhat's the content in the center of the image?", # noqa: E501 + "stop_sign": "\nWhat's the content in the center of the image?", "cherry_blossom": "\nWhat is the season?", } ), @@ -529,7 +529,7 @@ VLM_TEST_SETTINGS = { max_model_len=16384, hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs( "llava-hf/llava-onevision-qwen2-0.5b-ov-hf" - ), # noqa: E501 + ), auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, custom_test_opts=[ @@ -583,7 +583,7 @@ VLM_TEST_SETTINGS = { max_num_seqs=2, get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids( ["<|im_end|>", "<|endoftext|>"] - ), # noqa: E501 + ), hf_output_post_proc=model_utils.minicpmv_trunc_hf_output, patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner, # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49 @@ -598,7 +598,7 @@ VLM_TEST_SETTINGS = { max_num_seqs=2, get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids( ["<|im_end|>", "<|endoftext|>"] - ), # noqa: E501 + ), hf_output_post_proc=model_utils.minicpmv_trunc_hf_output, patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner, ), @@ -627,7 +627,7 @@ VLM_TEST_SETTINGS = { models=["AIDC-AI/Ovis1.6-Gemma2-9B"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), prompt_formatter=lambda img_prompt: f"user\n{img_prompt}\nmodel\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "\n", # noqa: E501 + img_idx_to_prompt=lambda idx: "\n", max_model_len=4096, max_num_seqs=2, dtype="half", @@ -640,7 +640,7 @@ VLM_TEST_SETTINGS = { models=["AIDC-AI/Ovis2-1B"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "\n", # noqa: E501 + img_idx_to_prompt=lambda idx: "\n", max_model_len=4096, max_num_seqs=2, dtype="half", @@ -652,7 +652,7 @@ VLM_TEST_SETTINGS = { models=["AIDC-AI/Ovis2.5-2B"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO), prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 - img_idx_to_prompt=lambda idx: "\n", # noqa: E501 + img_idx_to_prompt=lambda idx: "\n", video_idx_to_prompt=lambda idx: "