From 9715f7bb0fd70fa3dac6f35c824e90e58f0086ce Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Wed, 27 Aug 2025 03:01:25 +0800
Subject: [PATCH] [Bugfix] Fix incorrect original shape in hashing (#23672)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lukas Geiger <lukas.geiger94@gmail.com>
---
 tests/multimodal/test_hasher.py |  7 ++++---
 vllm/multimodal/hasher.py       | 10 ++++++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/tests/multimodal/test_hasher.py b/tests/multimodal/test_hasher.py
index 75a233c256..2751e38760 100644
--- a/tests/multimodal/test_hasher.py
+++ b/tests/multimodal/test_hasher.py
@@ -45,10 +45,11 @@ def test_hash_collision_image_transpose():
     assert hasher.hash_kwargs(image=image1) != hasher.hash_kwargs(image=image2)
 
 
-def test_hash_collision_tensor_shape():
+@pytest.mark.parametrize("dtype", [torch.float32, torch.bfloat16])
+def test_hash_collision_tensor_shape(dtype):
     # The hash should be different though the data is the same when flattened
-    arr1 = torch.zeros((5, 10, 20, 3))
-    arr2 = torch.zeros((10, 20, 5, 3))
+    arr1 = torch.zeros((5, 10, 20, 3), dtype=dtype)
+    arr2 = torch.zeros((10, 20, 5, 3), dtype=dtype)
 
     hasher = MultiModalHasher
     assert hasher.hash_kwargs(data=arr1) != hasher.hash_kwargs(data=arr2)
diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py
index 479961776a..3708dc7065 100644
--- a/vllm/multimodal/hasher.py
+++ b/vllm/multimodal/hasher.py
@@ -45,16 +45,22 @@ class MultiModalHasher:
         if isinstance(obj, torch.Tensor):
             tensor_obj: torch.Tensor = obj.cpu()
             tensor_dtype = tensor_obj.dtype
+            tensor_shape = tensor_obj.shape
+
+            # NumPy does not support bfloat16.
+            # Workaround: View the tensor as a contiguous 1D array of bytes
             if tensor_dtype == torch.bfloat16:
                 tensor_obj = tensor_obj.contiguous()
                 tensor_obj = tensor_obj.view(
                     (tensor_obj.numel(), )).view(torch.uint8)
+
                 return cls.item_to_bytes(
                     "tensor", {
                         "original_dtype": str(tensor_dtype),
-                        "original_shape": tuple(tensor_obj.shape),
-                        "data": tensor_obj.numpy()
+                        "original_shape": tuple(tensor_shape),
+                        "data": tensor_obj.numpy(),
                     })
+
             return cls.item_to_bytes("tensor", tensor_obj.numpy())
         if isinstance(obj, np.ndarray):
             # If the array is non-contiguous, we need to copy it first