Compare commits

...

1 Commits

Author SHA1 Message Date
dabc03baa7 updated
Signed-off-by: Robert Shaw <robshaw@redhat.com>
2025-08-19 17:05:49 +00:00
3 changed files with 23 additions and 7 deletions

View File

@ -840,3 +840,8 @@ Key capabilities:
The following example shows how to deploy a large model like DeepSeek R1 with Ray Serve LLM: <gh-file:examples/online_serving/ray_serve_deepseek.py>. The following example shows how to deploy a large model like DeepSeek R1 with Ray Serve LLM: <gh-file:examples/online_serving/ray_serve_deepseek.py>.
Learn more about Ray Serve LLM with the official [Ray Serve LLM documentation](https://docs.ray.io/en/latest/serve/llm/serving-llms.html). Learn more about Ray Serve LLM with the official [Ray Serve LLM documentation](https://docs.ray.io/en/latest/serve/llm/serving-llms.html).
curl http://localhost:8002/v1/rerank -H "Content-Type: application/json" -d '{
"query": "What is the capital of France?",
"documents": ["The capital of France is Paris.", "The capital of Germany is Berlin."]
}'

View File

@ -5,10 +5,4 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req
numba == 0.61.2; python_version > '3.9' numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. torch==2.8.0
torch==2.7.1
torchaudio==2.7.1
# These must be updated alongside torch
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7

View File

@ -39,3 +39,20 @@ def test_models(example_prompts, model_name) -> None:
expected_str = EXPECTED_STRS_MAP[model_name][i] expected_str = EXPECTED_STRS_MAP[model_name][i]
assert expected_str == output_str, ( assert expected_str == output_str, (
f"Expected: {expected_str!r}\nvLLM: {output_str!r}") f"Expected: {expected_str!r}\nvLLM: {output_str!r}")
curl https://localhost:8002/v1/embeddings \
-H "Content-Type: application/json" \
-d '{
"input": "Query: What is the capital of France? \n\nDocuments: \n1. Paris is the capital city of France.\n2. Berlin is the capital of Germany.\n \n Rank the documents from most to least relevant to the query and provide a relevance score",
"model": "$MODEL",
"encoding_format": "float"
}'
curl https://localhost:8002/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"input": "Query: What is the capital of France? \n\nDocuments: \n1. Paris is the capital city of France.\n2. Berlin is the capital of Germany.\n \n Rank the documents from most to least relevant to the query and provide a relevance score",
"prompt": "Query: What is the capital of France? \n\nDocuments: \n1. Paris is the capital city of France.\n2. Berlin is the capital of Germany.\n \n Rank the documents from most to least relevant to the query and provide a relevance score"
"model": "BAAI/bge-reranker-v2-m3",
}'