[PERF] Use pybase64 to more quickly decode prompt embeddings (#22469)

Signed-off-by: Andrew Sansom <andrew@protopia.ai>
2025-10-20 23:03:52 +08:00 · 2025-08-07 21:15:32 -05:00
parent 1ee5ead5f8
commit e2c8f1edec
1 changed files with 3 additions and 2 deletions
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
-import base64
 import io
 import json
 import sys
@ -12,6 +11,7 @@ from http import HTTPStatus
 from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
                    TypeVar, Union, cast, overload)

+import pybase64
 import torch
 from fastapi import Request
 from pydantic import BaseModel, ConfigDict, Field
@ -1008,7 +1008,8 @@ class OpenAIServing:
    ) -> list[EmbedsPrompt]:

        def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
-            tensor = torch.load(io.BytesIO(base64.b64decode(embed)),
+            tensor = torch.load(io.BytesIO(
+                pybase64.b64decode(embed, validate=True)),
                                weights_only=True)
            assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
                torch.float32,