[PERF] Use pybase64 to more quickly decode prompt embeddings (#22469)

Signed-off-by: Andrew Sansom <andrew@protopia.ai>
This commit is contained in:
Andrew Sansom
2025-08-07 21:15:32 -05:00
committed by GitHub
parent 1ee5ead5f8
commit e2c8f1edec

View File

@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import base64
import io
import json
import sys
@ -12,6 +11,7 @@ from http import HTTPStatus
from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
TypeVar, Union, cast, overload)
import pybase64
import torch
from fastapi import Request
from pydantic import BaseModel, ConfigDict, Field
@ -1008,7 +1008,8 @@ class OpenAIServing:
) -> list[EmbedsPrompt]:
def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
tensor = torch.load(io.BytesIO(base64.b64decode(embed)),
tensor = torch.load(io.BytesIO(
pybase64.b64decode(embed, validate=True)),
weights_only=True)
assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
torch.float32,