[CI] Remove benchmark patch and increase the scheduler frequency (#1762)

### What this PR does / why we need it?
This pr purpose to do the following things:
1. Remove `benchmark_datasets.py` patch
2. Increase the scheduler frequency to 2 times per day, due to the
recent large number of daily submissions, we need to increase the
default test time(6h)
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.9.2
- vLLM main:
247102f07f

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2025-07-13 20:00:35 +08:00
committed by GitHub
parent d118bf8a26
commit 9cd4ac76a1
4 changed files with 4 additions and 84 deletions

View File

@ -20,8 +20,9 @@ name: 'Benchmarks / Performance'
on:
schedule:
# Run at 02:00 everyday
- cron: '00 18 * * *'
# Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8)
- cron: "0 12 * * *"
- cron: "0 19 * * *"
workflow_dispatch:
# Allow manual triggering of the workflow

View File

@ -1,5 +1,4 @@
pandas
datasets
modelscope
libcst
tabulate

View File

@ -1,79 +0,0 @@
import os
from argparse import ArgumentParser
import libcst as cst
import libcst.matchers as m
# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
# TODO(Potabk): Remove this patch when the issue is fixed in the upstream
class StreamingFalseTransformer(cst.CSTTransformer):
def __init__(self):
self.in_target_class = False
self.in_target_func = False
def visit_ClassDef(self, node):
if node.name.value == "HuggingFaceDataset":
self.in_target_class = True
def leave_ClassDef(self, original_node, updated_node):
self.in_target_class = False
return updated_node
def visit_FunctionDef(self, node):
if self.in_target_class and node.name.value == "load_data":
self.in_target_func = True
def leave_FunctionDef(self, original_node, updated_node):
self.in_target_func = False
return updated_node
def leave_Call(self, original_node, updated_node):
if self.in_target_class and self.in_target_func:
if m.matches(updated_node.func, m.Name("load_dataset")):
new_args = []
for arg in updated_node.args:
if arg.keyword and arg.keyword.value == "streaming":
new_arg = arg.with_changes(value=cst.Name("False"))
new_args.append(new_arg)
else:
new_args.append(arg)
return updated_node.with_changes(args=new_args)
return updated_node
def patch_file(path):
abs_path = os.path.abspath(path)
if not os.path.exists(abs_path):
print(f"File not found: {abs_path}")
return
with open(abs_path, "r", encoding="utf-8") as f:
source = f.read()
module = cst.parse_module(source)
modified = module.visit(StreamingFalseTransformer())
with open(abs_path, "w", encoding="utf-8") as f:
f.write(modified.code)
print(f"Patched: {abs_path}")
if __name__ == "__main__":
parser = ArgumentParser(
description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
)
parser.add_argument(
"--path", type=str, help="Path to the benchmark_dataset.py file"
)
parser.add_argument(
"--path",
type=str,
default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
help="Path to the benchmark_dataset.py file",
)
args = parser.parse_args()
patch_file(args.path)

View File

@ -281,7 +281,6 @@ cleanup_on_error() {
main() {
START_TIME=$(date +%s)
check_npus
python3 benchmarks/scripts/patch_benchmark_dataset.py
# dependencies
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)