From d31f7844f8d4e312f0521a69f7aa27c94103b2db Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Sun, 19 Oct 2025 20:20:55 +0800
Subject: [PATCH] [Misc] Move utils to avoid conflicts with stdlib, and move
 tests (#27169)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 tests/conftest.py                             |   2 +-
 tests/lora/test_add_lora.py                   |   2 +-
 .../multimodal/generation/test_common.py      |   2 +-
 .../generation/vlm_utils/model_utils.py       |   2 +-
 .../processing/test_tensor_schema.py          |   2 +-
 tests/utils_/test_async_utils.py              |   2 +-
 ...ollections.py => test_collection_utils.py} |   2 +-
 tests/utils_/test_func_utils.py               |   2 +-
 tests/utils_/test_hashing.py                  |  25 +++
 tests/utils_/test_mem_utils.py                |  63 +++++++
 tests/utils_/test_torch_utils.py              | 104 +++++++++++
 tests/utils_/test_utils.py                    | 176 ------------------
 tools/pre_commit/check_pickle_imports.py      |   9 +-
 vllm/benchmarks/throughput.py                 |   2 +-
 vllm/entrypoints/chat_utils.py                |   2 +-
 vllm/entrypoints/llm.py                       |   2 +-
 vllm/entrypoints/openai/serving_chat.py       |   2 +-
 vllm/entrypoints/openai/serving_completion.py |   4 +-
 vllm/entrypoints/openai/serving_embedding.py  |   4 +-
 vllm/entrypoints/openai/serving_engine.py     |   4 +-
 vllm/entrypoints/openai/serving_pooling.py    |   2 +-
 vllm/entrypoints/openai/serving_score.py      |   2 +-
 .../tool_parsers/abstract_tool_parser.py      |   2 +-
 vllm/entrypoints/renderer.py                  |   2 +-
 vllm/executor/executor_base.py                |   2 +-
 vllm/executor/ray_distributed_executor.py     |   2 +-
 vllm/inputs/parse.py                          |   2 +-
 vllm/model_executor/layers/activation.py      |   2 +-
 .../layers/fused_moe/deep_gemm_moe.py         |   2 +-
 .../layers/quantization/gptq.py               |   2 +-
 .../layers/quantization/gptq_marlin.py        |   2 +-
 vllm/model_executor/models/deepseek_vl2.py    |   2 +-
 vllm/model_executor/models/interfaces.py      |   2 +-
 vllm/model_executor/models/interfaces_base.py |   2 +-
 .../model_executor/models/llava_next_video.py |   2 +-
 vllm/model_executor/models/minicpmv.py        |   2 +-
 vllm/model_executor/models/qwen3_vl.py        |   2 +-
 vllm/multimodal/inputs.py                     |   2 +-
 vllm/multimodal/parse.py                      |   2 +-
 vllm/multimodal/processing.py                 |   6 +-
 vllm/multimodal/registry.py                   |   2 +-
 vllm/reasoning/abs_reasoning_parsers.py       |   2 +-
 vllm/tracing.py                               |   2 +-
 vllm/transformers_utils/processor.py          |   2 +-
 vllm/utils/__init__.py                        |   6 -
 vllm/utils/{asyncio.py => async_utils.py}     |   0
 .../{collections.py => collection_utils.py}   |   0
 vllm/utils/{functools.py => func_utils.py}    |   0
 vllm/v1/engine/async_llm.py                   |   6 +-
 vllm/v1/engine/core_client.py                 |   2 +-
 vllm/v1/worker/gpu_input_batch.py             |   2 +-
 vllm/v1/worker/tpu_input_batch.py             |   2 +-
 52 files changed, 246 insertions(+), 237 deletions(-)
 rename tests/utils_/{test_collections.py => test_collection_utils.py} (93%)
 create mode 100644 tests/utils_/test_hashing.py
 create mode 100644 tests/utils_/test_mem_utils.py
 create mode 100644 tests/utils_/test_torch_utils.py
 rename vllm/utils/{asyncio.py => async_utils.py} (100%)
 rename vllm/utils/{collections.py => collection_utils.py} (100%)
 rename vllm/utils/{functools.py => func_utils.py} (100%)

diff --git a/tests/conftest.py b/tests/conftest.py
index 5e94a8322e..ec0179b9cd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,7 +60,7 @@ from vllm.multimodal.utils import fetch_image
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import BeamSearchParams
 from vllm.transformers_utils.utils import maybe_model_redirect
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.torch_utils import set_default_torch_num_threads
 
 logger = init_logger(__name__)
diff --git a/tests/lora/test_add_lora.py b/tests/lora/test_add_lora.py
index d3d5e2c1cf..9a82ab99ea 100644
--- a/tests/lora/test_add_lora.py
+++ b/tests/lora/test_add_lora.py
@@ -12,7 +12,7 @@ from vllm.entrypoints.openai.api_server import (
 from vllm.inputs import TextPrompt
 from vllm.lora.request import LoRARequest
 from vllm.sampling_params import SamplingParams
-from vllm.utils.asyncio import merge_async_iterators
+from vllm.utils.async_utils import merge_async_iterators
 
 MODEL_PATH = "zai-org/chatglm3-6b"
 LORA_RANK = 64
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index d94a3d5cf3..44bbc4479c 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -17,7 +17,7 @@ from transformers import (
 )
 
 from vllm.platforms import current_platform
-from vllm.utils.functools import identity
+from vllm.utils.func_utils import identity
 
 from ....conftest import (
     IMAGE_ASSETS,
diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py
index 8329542584..8f0caed4dd 100644
--- a/tests/models/multimodal/generation/vlm_utils/model_utils.py
+++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py
@@ -25,7 +25,7 @@ from transformers import (
 from transformers.video_utils import VideoMetadata
 
 from vllm.logprobs import SampleLogprobs
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 
 from .....conftest import HfRunner, ImageAsset, ImageTestAssets
 from .types import RunnerOutput
diff --git a/tests/models/multimodal/processing/test_tensor_schema.py b/tests/models/multimodal/processing/test_tensor_schema.py
index 093898dd4b..c0436e1179 100644
--- a/tests/models/multimodal/processing/test_tensor_schema.py
+++ b/tests/models/multimodal/processing/test_tensor_schema.py
@@ -34,7 +34,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
 from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
 from vllm.multimodal.utils import group_mm_kwargs_by_modality
 from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.torch_utils import set_default_torch_dtype
 
 from ...registry import _MULTIMODAL_EXAMPLE_MODELS, HF_EXAMPLE_MODELS
diff --git a/tests/utils_/test_async_utils.py b/tests/utils_/test_async_utils.py
index 070cfb38fb..03d116bdfd 100644
--- a/tests/utils_/test_async_utils.py
+++ b/tests/utils_/test_async_utils.py
@@ -5,7 +5,7 @@ from collections.abc import AsyncIterator
 
 import pytest
 
-from vllm.utils.asyncio import merge_async_iterators
+from vllm.utils.async_utils import merge_async_iterators
 
 
 async def _mock_async_iterator(idx: int):
diff --git a/tests/utils_/test_collections.py b/tests/utils_/test_collection_utils.py
similarity index 93%
rename from tests/utils_/test_collections.py
rename to tests/utils_/test_collection_utils.py
index cb96bf2b0d..19f4a3d1c9 100644
--- a/tests/utils_/test_collections.py
+++ b/tests/utils_/test_collection_utils.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import pytest
 
-from vllm.utils.collections import swap_dict_values
+from vllm.utils.collection_utils import swap_dict_values
 
 
 @pytest.mark.parametrize(
diff --git a/tests/utils_/test_func_utils.py b/tests/utils_/test_func_utils.py
index e2b5003fd0..9ce1ada095 100644
--- a/tests/utils_/test_func_utils.py
+++ b/tests/utils_/test_func_utils.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from vllm.utils.functools import deprecate_kwargs, supports_kw
+from vllm.utils.func_utils import deprecate_kwargs, supports_kw
 
 from ..utils import error_on_warning
 
diff --git a/tests/utils_/test_hashing.py b/tests/utils_/test_hashing.py
new file mode 100644
index 0000000000..484627a547
--- /dev/null
+++ b/tests/utils_/test_hashing.py
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import hashlib
+import pickle
+
+import pytest
+
+from vllm.utils.hashing import sha256
+
+
+@pytest.mark.parametrize("input", [(), ("abc",), (None,), (None, bool, [1, 2, 3])])
+def test_sha256(input: tuple):
+    digest = sha256(input)
+    assert digest is not None
+    assert isinstance(digest, bytes)
+    assert digest != b""
+
+    input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
+    assert digest == hashlib.sha256(input_bytes).digest()
+
+    # hashing again, returns the same value
+    assert digest == sha256(input)
+
+    # hashing different input, returns different value
+    assert digest != sha256(input + (1,))
diff --git a/tests/utils_/test_mem_utils.py b/tests/utils_/test_mem_utils.py
new file mode 100644
index 0000000000..4b1058be41
--- /dev/null
+++ b/tests/utils_/test_mem_utils.py
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import torch
+from vllm_test_utils.monitor import monitor
+
+from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
+
+from ..utils import create_new_process_for_each_test
+
+
+@create_new_process_for_each_test()
+def test_memory_profiling():
+    # Fake out some model loading + inference memory usage to test profiling
+    # Memory used by other processes will show up as cuda usage outside of torch
+    from vllm.distributed.device_communicators.cuda_wrapper import CudaRTLibrary
+
+    lib = CudaRTLibrary()
+    # 512 MiB allocation outside of this instance
+    handle1 = lib.cudaMalloc(512 * 1024 * 1024)
+
+    baseline_snapshot = MemorySnapshot()
+
+    # load weights
+
+    weights = torch.randn(128, 1024, 1024, device="cuda", dtype=torch.float32)
+
+    weights_memory = 128 * 1024 * 1024 * 4  # 512 MiB
+
+    def measure_current_non_torch():
+        free, total = torch.cuda.mem_get_info()
+        current_used = total - free
+        current_torch = torch.cuda.memory_reserved()
+        current_non_torch = current_used - current_torch
+        return current_non_torch
+
+    with (
+        memory_profiling(
+            baseline_snapshot=baseline_snapshot, weights_memory=weights_memory
+        ) as result,
+        monitor(measure_current_non_torch) as monitored_values,
+    ):
+        # make a memory spike, 1 GiB
+        spike = torch.randn(256, 1024, 1024, device="cuda", dtype=torch.float32)
+        del spike
+
+        # Add some extra non-torch memory 256 MiB (simulate NCCL)
+        handle2 = lib.cudaMalloc(256 * 1024 * 1024)
+
+    # this is an analytic value, it is exact,
+    # we only have 256 MiB non-torch memory increase
+    measured_diff = monitored_values.values[-1] - monitored_values.values[0]
+    assert measured_diff == 256 * 1024 * 1024
+
+    # Check that the memory usage is within 5% of the expected values
+    # 5% tolerance is caused by cuda runtime.
+    # we cannot control cuda runtime in the granularity of bytes,
+    # which causes a small error (<10 MiB in practice)
+    non_torch_ratio = result.non_torch_increase / (256 * 1024 * 1024)  # noqa
+    assert abs(non_torch_ratio - 1) <= 0.05
+    assert result.torch_peak_increase == 1024 * 1024 * 1024
+    del weights
+    lib.cudaFree(handle1)
+    lib.cudaFree(handle2)
diff --git a/tests/utils_/test_torch_utils.py b/tests/utils_/test_torch_utils.py
new file mode 100644
index 0000000000..4a96627666
--- /dev/null
+++ b/tests/utils_/test_torch_utils.py
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+import torch
+
+from vllm.utils.torch_utils import (
+    common_broadcastable_dtype,
+    current_stream,
+    is_lossless_cast,
+)
+
+
+@pytest.mark.parametrize(
+    ("src_dtype", "tgt_dtype", "expected_result"),
+    [
+        # Different precision_levels
+        (torch.bool, torch.int8, True),
+        (torch.bool, torch.float16, True),
+        (torch.bool, torch.complex32, True),
+        (torch.int64, torch.bool, False),
+        (torch.int64, torch.float16, True),
+        (torch.int64, torch.complex32, True),
+        (torch.float64, torch.bool, False),
+        (torch.float64, torch.int8, False),
+        (torch.float64, torch.complex32, True),
+        (torch.complex128, torch.bool, False),
+        (torch.complex128, torch.int8, False),
+        (torch.complex128, torch.float16, False),
+        # precision_level=0
+        (torch.bool, torch.bool, True),
+        # precision_level=1
+        (torch.int8, torch.int16, True),
+        (torch.int16, torch.int8, False),
+        (torch.uint8, torch.int8, False),
+        (torch.int8, torch.uint8, False),
+        # precision_level=2
+        (torch.float16, torch.float32, True),
+        (torch.float32, torch.float16, False),
+        (torch.bfloat16, torch.float32, True),
+        (torch.float32, torch.bfloat16, False),
+        # precision_level=3
+        (torch.complex32, torch.complex64, True),
+        (torch.complex64, torch.complex32, False),
+    ],
+)
+def test_is_lossless_cast(src_dtype, tgt_dtype, expected_result):
+    assert is_lossless_cast(src_dtype, tgt_dtype) == expected_result
+
+
+@pytest.mark.parametrize(
+    ("dtypes", "expected_result"),
+    [
+        ([torch.bool], torch.bool),
+        ([torch.bool, torch.int8], torch.int8),
+        ([torch.bool, torch.int8, torch.float16], torch.float16),
+        ([torch.bool, torch.int8, torch.float16, torch.complex32], torch.complex32),  # noqa: E501
+    ],
+)
+def test_common_broadcastable_dtype(dtypes, expected_result):
+    assert common_broadcastable_dtype(dtypes) == expected_result
+
+
+def test_current_stream_multithread():
+    import threading
+
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+
+    main_default_stream = torch.cuda.current_stream()
+    child_stream = torch.cuda.Stream()
+
+    thread_stream_ready = threading.Event()
+    thread_can_exit = threading.Event()
+
+    def child_thread_func():
+        with torch.cuda.stream(child_stream):
+            thread_stream_ready.set()
+            thread_can_exit.wait(timeout=10)
+
+    child_thread = threading.Thread(target=child_thread_func)
+    child_thread.start()
+
+    try:
+        assert thread_stream_ready.wait(timeout=5), (
+            "Child thread failed to enter stream context in time"
+        )
+
+        main_current_stream = current_stream()
+
+        assert main_current_stream != child_stream, (
+            "Main thread's current_stream was contaminated by child thread"
+        )
+        assert main_current_stream == main_default_stream, (
+            "Main thread's current_stream is not the default stream"
+        )
+
+        # Notify child thread it can exit
+        thread_can_exit.set()
+
+    finally:
+        # Ensure child thread exits properly
+        child_thread.join(timeout=5)
+        if child_thread.is_alive():
+            pytest.fail("Child thread failed to exit properly")
diff --git a/tests/utils_/test_utils.py b/tests/utils_/test_utils.py
index 72771ed139..9028c925b5 100644
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -2,10 +2,8 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # ruff: noqa
 
-import hashlib
 import json
 import os
-import pickle
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
@@ -14,7 +12,6 @@ import pytest
 import torch
 import yaml
 from transformers import AutoTokenizer
-from vllm_test_utils.monitor import monitor
 
 from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
 from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens
@@ -24,13 +21,6 @@ from vllm.utils import (
     bind_kv_cache,
     unique_filepath,
 )
-from vllm.utils.hashing import sha256
-from vllm.utils.torch_utils import (
-    common_broadcastable_dtype,
-    current_stream,
-    is_lossless_cast,
-)
-from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
 from ..utils import create_new_process_for_each_test, flat_product
 
 
@@ -267,61 +257,6 @@ def test_duplicate_dict_args(caplog_vllm, parser):
     assert "-O.mode" in caplog_vllm.text
 
 
-@create_new_process_for_each_test()
-def test_memory_profiling():
-    # Fake out some model loading + inference memory usage to test profiling
-    # Memory used by other processes will show up as cuda usage outside of torch
-    from vllm.distributed.device_communicators.cuda_wrapper import CudaRTLibrary
-
-    lib = CudaRTLibrary()
-    # 512 MiB allocation outside of this instance
-    handle1 = lib.cudaMalloc(512 * 1024 * 1024)
-
-    baseline_snapshot = MemorySnapshot()
-
-    # load weights
-
-    weights = torch.randn(128, 1024, 1024, device="cuda", dtype=torch.float32)
-
-    weights_memory = 128 * 1024 * 1024 * 4  # 512 MiB
-
-    def measure_current_non_torch():
-        free, total = torch.cuda.mem_get_info()
-        current_used = total - free
-        current_torch = torch.cuda.memory_reserved()
-        current_non_torch = current_used - current_torch
-        return current_non_torch
-
-    with (
-        memory_profiling(
-            baseline_snapshot=baseline_snapshot, weights_memory=weights_memory
-        ) as result,
-        monitor(measure_current_non_torch) as monitored_values,
-    ):
-        # make a memory spike, 1 GiB
-        spike = torch.randn(256, 1024, 1024, device="cuda", dtype=torch.float32)
-        del spike
-
-        # Add some extra non-torch memory 256 MiB (simulate NCCL)
-        handle2 = lib.cudaMalloc(256 * 1024 * 1024)
-
-    # this is an analytic value, it is exact,
-    # we only have 256 MiB non-torch memory increase
-    measured_diff = monitored_values.values[-1] - monitored_values.values[0]
-    assert measured_diff == 256 * 1024 * 1024
-
-    # Check that the memory usage is within 5% of the expected values
-    # 5% tolerance is caused by cuda runtime.
-    # we cannot control cuda runtime in the granularity of bytes,
-    # which causes a small error (<10 MiB in practice)
-    non_torch_ratio = result.non_torch_increase / (256 * 1024 * 1024)  # noqa
-    assert abs(non_torch_ratio - 1) <= 0.05
-    assert result.torch_peak_increase == 1024 * 1024 * 1024
-    del weights
-    lib.cudaFree(handle1)
-    lib.cudaFree(handle2)
-
-
 def test_bind_kv_cache():
     from vllm.attention import Attention
 
@@ -403,56 +338,6 @@ def test_bind_kv_cache_pp():
         assert ctx["layers.0.self_attn"].kv_cache[1] is kv_cache[1][0]
 
 
-@pytest.mark.parametrize(
-    ("src_dtype", "tgt_dtype", "expected_result"),
-    [
-        # Different precision_levels
-        (torch.bool, torch.int8, True),
-        (torch.bool, torch.float16, True),
-        (torch.bool, torch.complex32, True),
-        (torch.int64, torch.bool, False),
-        (torch.int64, torch.float16, True),
-        (torch.int64, torch.complex32, True),
-        (torch.float64, torch.bool, False),
-        (torch.float64, torch.int8, False),
-        (torch.float64, torch.complex32, True),
-        (torch.complex128, torch.bool, False),
-        (torch.complex128, torch.int8, False),
-        (torch.complex128, torch.float16, False),
-        # precision_level=0
-        (torch.bool, torch.bool, True),
-        # precision_level=1
-        (torch.int8, torch.int16, True),
-        (torch.int16, torch.int8, False),
-        (torch.uint8, torch.int8, False),
-        (torch.int8, torch.uint8, False),
-        # precision_level=2
-        (torch.float16, torch.float32, True),
-        (torch.float32, torch.float16, False),
-        (torch.bfloat16, torch.float32, True),
-        (torch.float32, torch.bfloat16, False),
-        # precision_level=3
-        (torch.complex32, torch.complex64, True),
-        (torch.complex64, torch.complex32, False),
-    ],
-)
-def test_is_lossless_cast(src_dtype, tgt_dtype, expected_result):
-    assert is_lossless_cast(src_dtype, tgt_dtype) == expected_result
-
-
-@pytest.mark.parametrize(
-    ("dtypes", "expected_result"),
-    [
-        ([torch.bool], torch.bool),
-        ([torch.bool, torch.int8], torch.int8),
-        ([torch.bool, torch.int8, torch.float16], torch.float16),
-        ([torch.bool, torch.int8, torch.float16, torch.complex32], torch.complex32),  # noqa: E501
-    ],
-)
-def test_common_broadcastable_dtype(dtypes, expected_result):
-    assert common_broadcastable_dtype(dtypes) == expected_result
-
-
 def test_model_specification(
     parser_with_config, cli_config_file, cli_config_file_with_model
 ):
@@ -535,23 +420,6 @@ def test_model_specification(
     assert args.port == 12312
 
 
-@pytest.mark.parametrize("input", [(), ("abc",), (None,), (None, bool, [1, 2, 3])])
-def test_sha256(input: tuple):
-    digest = sha256(input)
-    assert digest is not None
-    assert isinstance(digest, bytes)
-    assert digest != b""
-
-    input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
-    assert digest == hashlib.sha256(input_bytes).digest()
-
-    # hashing again, returns the same value
-    assert digest == sha256(input)
-
-    # hashing different input, returns different value
-    assert digest != sha256(input + (1,))
-
-
 def test_convert_ids_list_to_tokens():
     tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
     token_ids = tokenizer.encode("Hello, world!")
@@ -561,50 +429,6 @@ def test_convert_ids_list_to_tokens():
     assert tokens == ["Hello", ",", " world", "!"]
 
 
-def test_current_stream_multithread():
-    import threading
-
-    if not torch.cuda.is_available():
-        pytest.skip("CUDA not available")
-
-    main_default_stream = torch.cuda.current_stream()
-    child_stream = torch.cuda.Stream()
-
-    thread_stream_ready = threading.Event()
-    thread_can_exit = threading.Event()
-
-    def child_thread_func():
-        with torch.cuda.stream(child_stream):
-            thread_stream_ready.set()
-            thread_can_exit.wait(timeout=10)
-
-    child_thread = threading.Thread(target=child_thread_func)
-    child_thread.start()
-
-    try:
-        assert thread_stream_ready.wait(timeout=5), (
-            "Child thread failed to enter stream context in time"
-        )
-
-        main_current_stream = current_stream()
-
-        assert main_current_stream != child_stream, (
-            "Main thread's current_stream was contaminated by child thread"
-        )
-        assert main_current_stream == main_default_stream, (
-            "Main thread's current_stream is not the default stream"
-        )
-
-        # Notify child thread it can exit
-        thread_can_exit.set()
-
-    finally:
-        # Ensure child thread exits properly
-        child_thread.join(timeout=5)
-        if child_thread.is_alive():
-            pytest.fail("Child thread failed to exit properly")
-
-
 def test_load_config_file(tmp_path):
     # Define the configuration data
     config_data = {
diff --git a/tools/pre_commit/check_pickle_imports.py b/tools/pre_commit/check_pickle_imports.py
index 6a5ac40d64..211abb463e 100644
--- a/tools/pre_commit/check_pickle_imports.py
+++ b/tools/pre_commit/check_pickle_imports.py
@@ -23,13 +23,14 @@ ALLOWED_FILES = {
     "vllm/transformers_utils/config.py",
     "vllm/model_executor/models/registry.py",
     "vllm/compilation/caching.py",
-    "tests/utils_/test_utils.py",
-    "tests/tokenization/test_cached_tokenizer.py",
     "vllm/distributed/utils.py",
     "vllm/distributed/parallel_state.py",
     "vllm/distributed/device_communicators/all_reduce_utils.py",
     "vllm/distributed/device_communicators/shm_broadcast.py",
     "vllm/distributed/device_communicators/shm_object_storage.py",
+    "vllm/utils/hashing.py",
+    "tests/utils_/test_hashing.py",
+    "tests/tokenization/test_cached_tokenizer.py",
     "benchmarks/kernels/graph_machete_bench.py",
     "benchmarks/kernels/benchmark_lora.py",
     "benchmarks/kernels/benchmark_machete.py",
@@ -40,10 +41,8 @@ ALLOWED_FILES = {
     "vllm/executor/mp_distributed_executor.py",
     "vllm/executor/ray_distributed_executor.py",
     "vllm/entrypoints/llm.py",
-    "tests/utils.py",
-    # pickle and cloudpickle
     "vllm/utils/__init__.py",
-    "vllm/utils/hashing.py",
+    "tests/utils.py",
 }
 
 PICKLE_RE = re.compile(
diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py
index 7610b95034..866365ac18 100644
--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -34,7 +34,7 @@ from vllm.inputs import TextPrompt, TokensPrompt
 from vllm.lora.request import LoRARequest
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import BeamSearchParams
-from vllm.utils.asyncio import merge_async_iterators
+from vllm.utils.async_utils import merge_async_iterators
 
 
 def run_vllm(
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index 10472b18d8..881447cb20 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -51,7 +51,7 @@ from vllm.transformers_utils.chat_templates import get_chat_template_fallback_pa
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
 from vllm.utils import random_uuid
-from vllm.utils.functools import supports_kw
+from vllm.utils.func_utils import supports_kw
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 30bcb59437..e82db693c9 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -76,7 +76,7 @@ from vllm.transformers_utils.tokenizer import (
 )
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import Counter, Device
-from vllm.utils.collections import as_iter, is_list_of
+from vllm.utils.collection_utils import as_iter, is_list_of
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.llm_engine import LLMEngine
 from vllm.v1.sample.logits_processor import LogitsProcessor
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index b5b314e15a..32e6b1d96c 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -70,7 +70,7 @@ from vllm.transformers_utils.tokenizers import (
     truncate_tool_call_ids,
     validate_request_params,
 )
-from vllm.utils.collections import as_list
+from vllm.utils.collection_utils import as_list
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 5c41990205..44211201d4 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -34,8 +34,8 @@ from vllm.logprobs import Logprob
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils.asyncio import merge_async_iterators
-from vllm.utils.collections import as_list
+from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.collection_utils import as_list
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py
index 2e3129cbeb..55f58e7757 100644
--- a/vllm/entrypoints/openai/serving_embedding.py
+++ b/vllm/entrypoints/openai/serving_embedding.py
@@ -39,8 +39,8 @@ from vllm.outputs import (
     RequestOutput,
 )
 from vllm.pooling_params import PoolingParams
-from vllm.utils.asyncio import merge_async_iterators
-from vllm.utils.collections import chunk_list
+from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.collection_utils import chunk_list
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index ffcde8c302..af5a423134 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -91,13 +91,13 @@ from vllm.tracing import (
 )
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
 from vllm.utils import random_uuid
-from vllm.utils.asyncio import (
+from vllm.utils.async_utils import (
     AsyncMicrobatchTokenizer,
     collect_from_async_generator,
     make_async,
     merge_async_iterators,
 )
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.v1.engine import EngineCoreRequest
 
 logger = init_logger(__name__)
diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py
index 102a29fe35..7a27348da3 100644
--- a/vllm/entrypoints/openai/serving_pooling.py
+++ b/vllm/entrypoints/openai/serving_pooling.py
@@ -36,7 +36,7 @@ from vllm.entrypoints.utils import _validate_truncation_size
 from vllm.logger import init_logger
 from vllm.outputs import PoolingOutput, PoolingRequestOutput
 from vllm.tasks import SupportedTask
-from vllm.utils.asyncio import merge_async_iterators
+from vllm.utils.async_utils import merge_async_iterators
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py
index de3d7fbb2c..9cbfc97918 100644
--- a/vllm/entrypoints/openai/serving_score.py
+++ b/vllm/entrypoints/openai/serving_score.py
@@ -37,7 +37,7 @@ from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.outputs import PoolingRequestOutput, ScoringRequestOutput
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils.asyncio import make_async, merge_async_iterators
+from vllm.utils.async_utils import make_async, merge_async_iterators
 
 logger = init_logger(__name__)
 
diff --git a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
index 34ec9f8110..4733288644 100644
--- a/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
@@ -12,7 +12,7 @@ from vllm.entrypoints.openai.protocol import (
 )
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.import_utils import import_from_path
 
 logger = init_logger(__name__)
diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py
index 8fbc17e96f..a845528200 100644
--- a/vllm/entrypoints/renderer.py
+++ b/vllm/entrypoints/renderer.py
@@ -17,7 +17,7 @@ from vllm.inputs.data import TextPrompt as EngineTextPrompt
 from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
 from vllm.inputs.parse import get_prompt_components, parse_raw_prompts
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils.asyncio import AsyncMicrobatchTokenizer
+from vllm.utils.async_utils import AsyncMicrobatchTokenizer
 
 
 @dataclass(frozen=True)
diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py
index cfb7950bcc..9de2249f6c 100644
--- a/vllm/executor/executor_base.py
+++ b/vllm/executor/executor_base.py
@@ -17,7 +17,7 @@ from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.sequence import ExecuteModelRequest
 from vllm.tasks import SupportedTask
-from vllm.utils.asyncio import make_async
+from vllm.utils.async_utils import make_async
 from vllm.v1.outputs import SamplerOutput
 from vllm.v1.worker.worker_base import WorkerBase
 
diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py
index bf5947acb3..8e8901807f 100644
--- a/vllm/executor/ray_distributed_executor.py
+++ b/vllm/executor/ray_distributed_executor.py
@@ -19,7 +19,7 @@ from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.ray.ray_env import get_env_vars_to_copy
 from vllm.sequence import ExecuteModelRequest
-from vllm.utils.asyncio import make_async
+from vllm.utils.async_utils import make_async
 from vllm.utils.network_utils import (
     get_distributed_init_method,
     get_ip,
diff --git a/vllm/inputs/parse.py b/vllm/inputs/parse.py
index c84fc098f0..211551be8e 100644
--- a/vllm/inputs/parse.py
+++ b/vllm/inputs/parse.py
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Literal, NamedTuple, TypeAlias, TypedDict, cas
 
 from typing_extensions import TypeIs
 
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 
 from .data import (
     EmbedsPrompt,
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
index f48fad559e..fb11227089 100644
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -17,7 +17,7 @@ from vllm.logger import init_logger
 from vllm.model_executor.custom_op import CustomOp
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
-from vllm.utils.collections import LazyDict
+from vllm.utils.collection_utils import LazyDict
 
 logger = init_logger(__name__)
 
diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
index 71776c654b..69a815a4e3 100644
--- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
@@ -31,7 +31,7 @@ from vllm.utils.deep_gemm import (
     get_mk_alignment_for_contiguous_layout,
     m_grouped_fp8_gemm_nt_contiguous,
 )
-from vllm.utils.functools import run_once
+from vllm.utils.func_utils import run_once
 
 logger = init_logger(__name__)
 
diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py
index 0a1800590b..a3cd68948b 100644
--- a/vllm/model_executor/layers/quantization/gptq.py
+++ b/vllm/model_executor/layers/quantization/gptq.py
@@ -28,7 +28,7 @@ from vllm.model_executor.parameter import (
     RowvLLMParameter,
 )
 from vllm.transformers_utils.config import get_safetensors_params_metadata
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 
 if TYPE_CHECKING:
     from vllm.model_executor.layers.quantization import QuantizationMethods
diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py
index 191c7a6388..0d5439357f 100644
--- a/vllm/model_executor/layers/quantization/gptq_marlin.py
+++ b/vllm/model_executor/layers/quantization/gptq_marlin.py
@@ -57,7 +57,7 @@ from vllm.model_executor.parameter import (
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
 from vllm.transformers_utils.config import get_safetensors_params_metadata
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 
 logger = init_logger(__name__)
 
diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py
index 56bbaf0da1..3fc8187278 100644
--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -48,7 +48,7 @@ from vllm.transformers_utils.configs.deepseek_vl2 import (
 )
 from vllm.transformers_utils.processors.deepseek_vl2 import DeepseekVLV2Processor
 from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 from vllm.utils.torch_utils import set_default_torch_dtype
 
diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
index 0eef5ab7db..6e046c16b7 100644
--- a/vllm/model_executor/models/interfaces.py
+++ b/vllm/model_executor/models/interfaces.py
@@ -24,7 +24,7 @@ from vllm.inputs import TokensPrompt
 from vllm.inputs.data import PromptType
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QuantizationConfig
-from vllm.utils.functools import supports_kw
+from vllm.utils.func_utils import supports_kw
 
 from .interfaces_base import VllmModel, is_pooling_model
 
diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py
index 0c3c262da4..d87a65a470 100644
--- a/vllm/model_executor/models/interfaces_base.py
+++ b/vllm/model_executor/models/interfaces_base.py
@@ -15,7 +15,7 @@ import torch.nn as nn
 from typing_extensions import TypeIs, TypeVar
 
 from vllm.logger import init_logger
-from vllm.utils.functools import supports_kw
+from vllm.utils.func_utils import supports_kw
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py
index 56acb3ddf1..77c331b018 100644
--- a/vllm/model_executor/models/llava_next_video.py
+++ b/vllm/model_executor/models/llava_next_video.py
@@ -33,7 +33,7 @@ from vllm.multimodal.processing import (
 )
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 
 from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py
index b4a558ad69..147661babc 100644
--- a/vllm/model_executor/models/minicpmv.py
+++ b/vllm/model_executor/models/minicpmv.py
@@ -85,7 +85,7 @@ from vllm.multimodal.processing import (
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.platforms import current_platform
 from vllm.sequence import IntermediateTensors
-from vllm.utils.collections import flatten_2d_lists
+from vllm.utils.collection_utils import flatten_2d_lists
 from vllm.utils.tensor_schema import TensorSchema, TensorShape
 from vllm.utils.torch_utils import set_default_torch_dtype
 
diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py
index 1718cb2603..6955fc80af 100644
--- a/vllm/model_executor/models/qwen3_vl.py
+++ b/vllm/model_executor/models/qwen3_vl.py
@@ -79,7 +79,7 @@ from vllm.multimodal.processing import (
 )
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 
 from .interfaces import (
     MultiModalEmbeddings,
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index e3a0fa562e..a05f54191f 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -22,7 +22,7 @@ from typing import (
 import numpy as np
 from typing_extensions import NotRequired, TypeVar, deprecated
 
-from vllm.utils.collections import full_groupby, is_list_of
+from vllm.utils.collection_utils import full_groupby, is_list_of
 from vllm.utils.import_utils import LazyLoader
 from vllm.utils.jsontree import json_map_leaves
 
diff --git a/vllm/multimodal/parse.py b/vllm/multimodal/parse.py
index 8cb402e951..1ae2c7408a 100644
--- a/vllm/multimodal/parse.py
+++ b/vllm/multimodal/parse.py
@@ -19,7 +19,7 @@ import numpy as np
 import torch
 from typing_extensions import assert_never
 
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.import_utils import LazyLoader
 
 from .audio import AudioResampler
diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py
index d4477d8c85..94122c1d4c 100644
--- a/vllm/multimodal/processing.py
+++ b/vllm/multimodal/processing.py
@@ -25,8 +25,8 @@ from typing_extensions import TypeVar, assert_never
 from vllm.logger import init_logger
 from vllm.transformers_utils.processor import cached_processor_from_config
 from vllm.transformers_utils.tokenizer import AnyTokenizer, decode_tokens, encode_tokens
-from vllm.utils.collections import flatten_2d_lists, full_groupby
-from vllm.utils.functools import get_allowed_kwarg_only_overrides
+from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
+from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
 from vllm.utils.jsontree import JSONTree, json_map_leaves
 
 from .hasher import MultiModalHasher
@@ -486,7 +486,7 @@ _M = TypeVar("_M", bound=_HasModalityAttr | _HasModalityProp)
 def full_groupby_modality(values: Iterable[_M]) -> ItemsView[str, list[_M]]:
     """
     Convenience function to apply
-    [`full_groupby`][vllm.utils.collections.full_groupby]
+    [`full_groupby`][vllm.utils.collection_utils.full_groupby]
     based on modality.
     """
     return full_groupby(values, key=lambda x: x.modality)
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
index 0ac10bd08b..2e4031bd51 100644
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -9,7 +9,7 @@ import torch.nn as nn
 from vllm.config.multimodal import BaseDummyOptions
 from vllm.logger import init_logger
 from vllm.transformers_utils.tokenizer import AnyTokenizer, cached_tokenizer_from_config
-from vllm.utils.collections import ClassRegistry
+from vllm.utils.collection_utils import ClassRegistry
 
 from .cache import BaseMultiModalProcessorCache
 from .processing import (
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index ee890e662e..ebd660ca5a 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any
 
 from vllm.entrypoints.tool_server import ToolServer
 from vllm.logger import init_logger
-from vllm.utils.collections import is_list_of
+from vllm.utils.collection_utils import is_list_of
 from vllm.utils.import_utils import import_from_path
 
 if TYPE_CHECKING:
diff --git a/vllm/tracing.py b/vllm/tracing.py
index 4e55c746c8..01bbebf35c 100644
--- a/vllm/tracing.py
+++ b/vllm/tracing.py
@@ -5,7 +5,7 @@ import os
 from collections.abc import Mapping
 
 from vllm.logger import init_logger
-from vllm.utils.functools import run_once
+from vllm.utils.func_utils import run_once
 
 TRACE_HEADERS = ["traceparent", "tracestate"]
 
diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py
index 7f9d554733..98eb9cf335 100644
--- a/vllm/transformers_utils/processor.py
+++ b/vllm/transformers_utils/processor.py
@@ -16,7 +16,7 @@ from transformers.processing_utils import ProcessorMixin
 from transformers.video_processing_utils import BaseVideoProcessor
 from typing_extensions import TypeVar
 
-from vllm.utils.functools import get_allowed_kwarg_only_overrides
+from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig
diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py
index e1504d49a4..9a52e99998 100644
--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -1122,9 +1122,6 @@ def warn_for_unimplemented_methods(cls: type[T]) -> type[T]:
     return cls
 
 
-## moved to vllm.utils.profiling (imported at module top)
-
-
 # Only relevant for models using ALiBi (e.g, MPT)
 def check_use_alibi(model_config: ModelConfig) -> bool:
     cfg = model_config.hf_text_config
@@ -1150,9 +1147,6 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
     )
 
 
-## moved to vllm.utils.hashing
-
-
 @cache
 def _has_module(module_name: str) -> bool:
     """Return True if *module_name* can be found in the current environment.
diff --git a/vllm/utils/asyncio.py b/vllm/utils/async_utils.py
similarity index 100%
rename from vllm/utils/asyncio.py
rename to vllm/utils/async_utils.py
diff --git a/vllm/utils/collections.py b/vllm/utils/collection_utils.py
similarity index 100%
rename from vllm/utils/collections.py
rename to vllm/utils/collection_utils.py
diff --git a/vllm/utils/functools.py b/vllm/utils/func_utils.py
similarity index 100%
rename from vllm/utils/functools.py
rename to vllm/utils/func_utils.py
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
index 2e8a3150b4..e17cd7beb0 100644
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -30,9 +30,9 @@ from vllm.transformers_utils.config import maybe_register_config_serialize_by_va
 from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import Device, cdiv
-from vllm.utils.asyncio import cancel_task_threadsafe
-from vllm.utils.collections import as_list
-from vllm.utils.functools import deprecate_kwargs
+from vllm.utils.async_utils import cancel_task_threadsafe
+from vllm.utils.collection_utils import as_list
+from vllm.utils.func_utils import deprecate_kwargs
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.core_client import EngineCoreClient
 from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError
diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
index c99f672f15..9e99454117 100644
--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -23,7 +23,7 @@ from vllm.config import VllmConfig
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.tasks import SupportedTask
-from vllm.utils.asyncio import in_loop
+from vllm.utils.async_utils import in_loop
 from vllm.utils.network_utils import (
     close_sockets,
     get_open_port,
diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py
index 4a9cbeaea0..476c3edefb 100644
--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -13,7 +13,7 @@ from vllm.multimodal.inputs import MultiModalFeatureSpec
 from vllm.pooling_params import PoolingParams
 from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.utils import length_from_prompt_token_ids_or_embeds
-from vllm.utils.collections import swap_dict_values
+from vllm.utils.collection_utils import swap_dict_values
 from vllm.v1.outputs import LogprobsTensors
 from vllm.v1.pool.metadata import PoolingMetadata
 from vllm.v1.sample.logits_processor import (
diff --git a/vllm/v1/worker/tpu_input_batch.py b/vllm/v1/worker/tpu_input_batch.py
index f52d92afab..74e8225b2f 100644
--- a/vllm/v1/worker/tpu_input_batch.py
+++ b/vllm/v1/worker/tpu_input_batch.py
@@ -10,7 +10,7 @@ import torch
 from vllm.lora.request import LoRARequest
 from vllm.sampling_params import SamplingType
 from vllm.utils import length_from_prompt_token_ids_or_embeds
-from vllm.utils.collections import swap_dict_values
+from vllm.utils.collection_utils import swap_dict_values
 from vllm.v1.outputs import LogprobsTensors
 from vllm.v1.worker.block_table import MultiGroupBlockTable
 from vllm.v1.worker.gpu_input_batch import CachedRequestState