mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[PERF] Use pybase64 to more quickly decode prompt embeddings (#22469)
Signed-off-by: Andrew Sansom <andrew@protopia.ai>
This commit is contained in:
@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import sys
|
||||
@ -12,6 +11,7 @@ from http import HTTPStatus
|
||||
from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
|
||||
TypeVar, Union, cast, overload)
|
||||
|
||||
import pybase64
|
||||
import torch
|
||||
from fastapi import Request
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
@ -1008,7 +1008,8 @@ class OpenAIServing:
|
||||
) -> list[EmbedsPrompt]:
|
||||
|
||||
def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
|
||||
tensor = torch.load(io.BytesIO(base64.b64decode(embed)),
|
||||
tensor = torch.load(io.BytesIO(
|
||||
pybase64.b64decode(embed, validate=True)),
|
||||
weights_only=True)
|
||||
assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
|
||||
torch.float32,
|
||||
|
Reference in New Issue
Block a user