mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
Enable ruff on benchmark and scripts (#40634)
* Enable ruff on benchmark and scripts Signed-off-by: cyy <cyyever@outlook.com> * Cover benchmark_v2 Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * correct * style * style --------- Signed-off-by: cyy <cyyever@outlook.com> Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> Co-authored-by: Cyril Vallez <cyril.vallez@gmail.com>
This commit is contained in:
2
Makefile
2
Makefile
@ -3,7 +3,7 @@
|
||||
# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!)
|
||||
export PYTHONPATH = src
|
||||
|
||||
check_dirs := examples tests src utils
|
||||
check_dirs := examples tests src utils scripts benchmark benchmark_v2
|
||||
|
||||
exclude_folders := ""
|
||||
|
||||
|
@ -11,25 +11,28 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from logging import Logger
|
||||
import os
|
||||
import sys
|
||||
from logging import Logger
|
||||
from threading import Event, Thread
|
||||
from time import perf_counter, sleep
|
||||
from typing import Optional
|
||||
import sys
|
||||
|
||||
|
||||
# Add the parent directory to Python path to import benchmarks_entrypoint
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
|
||||
import gpustat
|
||||
import psutil
|
||||
import psycopg2
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
|
||||
|
||||
# Optional heavy ML dependencies - only required when actually running the benchmark
|
||||
try:
|
||||
import torch
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
|
||||
|
||||
TRANSFORMERS_AVAILABLE = True
|
||||
except ImportError:
|
||||
TRANSFORMERS_AVAILABLE = False
|
||||
@ -63,7 +66,13 @@ def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, metrics_recorder=None, num_tokens_to_generate=100
|
||||
logger: Logger,
|
||||
repository: str,
|
||||
branch: str,
|
||||
commit_id: str,
|
||||
commit_msg: str,
|
||||
metrics_recorder=None,
|
||||
num_tokens_to_generate=100,
|
||||
):
|
||||
# Check if required ML dependencies are available
|
||||
if not TRANSFORMERS_AVAILABLE:
|
||||
@ -154,7 +163,7 @@ def run_benchmark(
|
||||
# First eager forward pass
|
||||
logger.info("running first eager forward pass")
|
||||
start = perf_counter()
|
||||
outputs = model(**inputs)
|
||||
_ = model(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
first_eager_fwd_pass_time = end - start
|
||||
@ -163,7 +172,7 @@ def run_benchmark(
|
||||
# Second eager forward pass (should be faster)
|
||||
logger.info("running second eager forward pass")
|
||||
start = perf_counter()
|
||||
outputs = model(**inputs)
|
||||
_ = model(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
second_eager_fwd_pass_time = end - start
|
||||
|
@ -31,9 +31,7 @@ from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
from git import Repo
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
from optimum_benchmark import Benchmark
|
||||
from optimum_benchmark_wrapper import main
|
||||
|
||||
|
@ -13,19 +13,20 @@
|
||||
# limitations under the License.
|
||||
import argparse
|
||||
import importlib.util
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Dict, Tuple, Optional, List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
try:
|
||||
from psycopg2.extensions import register_adapter
|
||||
from psycopg2.extras import Json
|
||||
|
||||
register_adapter(dict, Json)
|
||||
PSYCOPG2_AVAILABLE = True
|
||||
except ImportError:
|
||||
@ -38,8 +39,14 @@ class ImportModuleException(Exception):
|
||||
|
||||
class MetricsRecorder:
|
||||
def __init__(
|
||||
self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str,
|
||||
collect_csv_data: bool = True
|
||||
self,
|
||||
connection,
|
||||
logger: logging.Logger,
|
||||
repository: str,
|
||||
branch: str,
|
||||
commit_id: str,
|
||||
commit_msg: str,
|
||||
collect_csv_data: bool = True,
|
||||
):
|
||||
self.conn = connection
|
||||
self.use_database = connection is not None
|
||||
@ -55,23 +62,39 @@ class MetricsRecorder:
|
||||
# For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
|
||||
if self.collect_csv_data:
|
||||
# Initialize empty DataFrames with proper schemas
|
||||
self.benchmarks_df = pd.DataFrame(columns=[
|
||||
'benchmark_id', 'repository', 'branch', 'commit_id', 'commit_message',
|
||||
'metadata', 'created_at'
|
||||
])
|
||||
self.device_measurements_df = pd.DataFrame(columns=[
|
||||
'benchmark_id', 'cpu_util', 'mem_megabytes', 'gpu_util',
|
||||
'gpu_mem_megabytes', 'time'
|
||||
])
|
||||
self.model_measurements_df = pd.DataFrame(columns=[
|
||||
'benchmark_id', 'time', 'model_load_time', 'first_eager_forward_pass_time_secs',
|
||||
'second_eager_forward_pass_time_secs', 'first_eager_generate_time_secs',
|
||||
'second_eager_generate_time_secs', 'time_to_first_token_secs',
|
||||
'time_to_second_token_secs', 'time_to_third_token_secs',
|
||||
'time_to_next_token_mean_secs', 'first_compile_generate_time_secs',
|
||||
'second_compile_generate_time_secs', 'third_compile_generate_time_secs',
|
||||
'fourth_compile_generate_time_secs'
|
||||
])
|
||||
self.benchmarks_df = pd.DataFrame(
|
||||
columns=[
|
||||
"benchmark_id",
|
||||
"repository",
|
||||
"branch",
|
||||
"commit_id",
|
||||
"commit_message",
|
||||
"metadata",
|
||||
"created_at",
|
||||
]
|
||||
)
|
||||
self.device_measurements_df = pd.DataFrame(
|
||||
columns=["benchmark_id", "cpu_util", "mem_megabytes", "gpu_util", "gpu_mem_megabytes", "time"]
|
||||
)
|
||||
self.model_measurements_df = pd.DataFrame(
|
||||
columns=[
|
||||
"benchmark_id",
|
||||
"time",
|
||||
"model_load_time",
|
||||
"first_eager_forward_pass_time_secs",
|
||||
"second_eager_forward_pass_time_secs",
|
||||
"first_eager_generate_time_secs",
|
||||
"second_eager_generate_time_secs",
|
||||
"time_to_first_token_secs",
|
||||
"time_to_second_token_secs",
|
||||
"time_to_third_token_secs",
|
||||
"time_to_next_token_mean_secs",
|
||||
"first_compile_generate_time_secs",
|
||||
"second_compile_generate_time_secs",
|
||||
"third_compile_generate_time_secs",
|
||||
"fourth_compile_generate_time_secs",
|
||||
]
|
||||
)
|
||||
else:
|
||||
self.benchmarks_df = None
|
||||
self.device_measurements_df = None
|
||||
@ -95,15 +118,19 @@ class MetricsRecorder:
|
||||
# Store benchmark data for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame
|
||||
new_row = pd.DataFrame([{
|
||||
'benchmark_id': benchmark_id,
|
||||
'repository': self.repository,
|
||||
'branch': self.branch,
|
||||
'commit_id': self.commit_id,
|
||||
'commit_message': self.commit_msg,
|
||||
'metadata': json.dumps(metadata),
|
||||
'created_at': datetime.utcnow().isoformat()
|
||||
}])
|
||||
new_row = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"benchmark_id": benchmark_id,
|
||||
"repository": self.repository,
|
||||
"branch": self.branch,
|
||||
"commit_id": self.commit_id,
|
||||
"commit_message": self.commit_msg,
|
||||
"metadata": json.dumps(metadata),
|
||||
"created_at": datetime.utcnow().isoformat(),
|
||||
}
|
||||
]
|
||||
)
|
||||
self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
|
||||
|
||||
mode_info = []
|
||||
@ -123,14 +150,18 @@ class MetricsRecorder:
|
||||
# Store device measurements for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame
|
||||
new_row = pd.DataFrame([{
|
||||
'benchmark_id': benchmark_id,
|
||||
'cpu_util': cpu_util,
|
||||
'mem_megabytes': mem_megabytes,
|
||||
'gpu_util': gpu_util,
|
||||
'gpu_mem_megabytes': gpu_mem_megabytes,
|
||||
'time': datetime.utcnow().isoformat()
|
||||
}])
|
||||
new_row = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
"benchmark_id": benchmark_id,
|
||||
"cpu_util": cpu_util,
|
||||
"mem_megabytes": mem_megabytes,
|
||||
"gpu_util": gpu_util,
|
||||
"gpu_mem_megabytes": gpu_mem_megabytes,
|
||||
"time": datetime.utcnow().isoformat(),
|
||||
}
|
||||
]
|
||||
)
|
||||
self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
|
||||
|
||||
# Store in database if available
|
||||
@ -149,10 +180,7 @@ class MetricsRecorder:
|
||||
# Store model measurements for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame with flattened measurements
|
||||
row_data = {
|
||||
'benchmark_id': benchmark_id,
|
||||
'time': datetime.utcnow().isoformat()
|
||||
}
|
||||
row_data = {"benchmark_id": benchmark_id, "time": datetime.utcnow().isoformat()}
|
||||
# Flatten the measurements dict into the row
|
||||
row_data.update(measurements)
|
||||
|
||||
@ -241,28 +269,34 @@ class MetricsRecorder:
|
||||
# Add model measurements (join on benchmark_id)
|
||||
if len(self.model_measurements_df) > 0:
|
||||
# Drop 'time' column from model measurements to avoid conflicts
|
||||
model_df = self.model_measurements_df.drop(columns=['time'], errors='ignore')
|
||||
summary_df = summary_df.merge(model_df, on='benchmark_id', how='left')
|
||||
model_df = self.model_measurements_df.drop(columns=["time"], errors="ignore")
|
||||
summary_df = summary_df.merge(model_df, on="benchmark_id", how="left")
|
||||
|
||||
# Calculate device measurement aggregates using pandas groupby
|
||||
if len(self.device_measurements_df) > 0:
|
||||
device_agg = self.device_measurements_df.groupby('benchmark_id').agg({
|
||||
'cpu_util': ['mean', 'max', 'std', 'count'],
|
||||
'mem_megabytes': ['mean', 'max', 'std'],
|
||||
'gpu_util': ['mean', 'max', 'std'],
|
||||
'gpu_mem_megabytes': ['mean', 'max', 'std']
|
||||
}).round(3)
|
||||
device_agg = (
|
||||
self.device_measurements_df.groupby("benchmark_id")
|
||||
.agg(
|
||||
{
|
||||
"cpu_util": ["mean", "max", "std", "count"],
|
||||
"mem_megabytes": ["mean", "max", "std"],
|
||||
"gpu_util": ["mean", "max", "std"],
|
||||
"gpu_mem_megabytes": ["mean", "max", "std"],
|
||||
}
|
||||
)
|
||||
.round(3)
|
||||
)
|
||||
|
||||
# Flatten column names
|
||||
device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
|
||||
device_agg = device_agg.reset_index()
|
||||
|
||||
# Rename count column to be more descriptive
|
||||
if 'cpu_util_count' in device_agg.columns:
|
||||
device_agg = device_agg.rename(columns={'cpu_util_count': 'device_measurement_count'})
|
||||
if "cpu_util_count" in device_agg.columns:
|
||||
device_agg = device_agg.rename(columns={"cpu_util_count": "device_measurement_count"})
|
||||
|
||||
# Merge with summary
|
||||
summary_df = summary_df.merge(device_agg, on='benchmark_id', how='left')
|
||||
summary_df = summary_df.merge(device_agg, on="benchmark_id", how="left")
|
||||
|
||||
# Export the comprehensive summary
|
||||
summary_df.to_csv(summary_file, index=False)
|
||||
@ -313,18 +347,13 @@ def parse_arguments() -> tuple[str, str, str, str, bool, str]:
|
||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--csv",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable CSV output files generation."
|
||||
)
|
||||
parser.add_argument("--csv", action="store_true", default=False, help="Enable CSV output files generation.")
|
||||
|
||||
parser.add_argument(
|
||||
"--csv-output-dir",
|
||||
type=str,
|
||||
default="benchmark_results",
|
||||
help="Directory for CSV output files (default: benchmark_results)."
|
||||
help="Directory for CSV output files (default: benchmark_results).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@ -356,6 +385,7 @@ def create_database_connection():
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
|
||||
conn = psycopg2.connect("dbname=metrics")
|
||||
logger.info("Successfully connected to database")
|
||||
return conn
|
||||
@ -364,8 +394,9 @@ def create_database_connection():
|
||||
return None
|
||||
|
||||
|
||||
def create_global_metrics_recorder(repository: str, branch: str, commit_id: str, commit_msg: str,
|
||||
generate_csv: bool = False) -> MetricsRecorder:
|
||||
def create_global_metrics_recorder(
|
||||
repository: str, branch: str, commit_id: str, commit_msg: str, generate_csv: bool = False
|
||||
) -> MetricsRecorder:
|
||||
"""
|
||||
Create a global metrics recorder that will be used across all benchmarks.
|
||||
"""
|
||||
@ -415,7 +446,7 @@ if __name__ == "__main__":
|
||||
try:
|
||||
logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
|
||||
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||
if hasattr(module, 'run_benchmark'):
|
||||
if hasattr(module, "run_benchmark"):
|
||||
benchmark_modules.append(entry.name)
|
||||
logger.debug(f"discovered benchmark: {entry.name}")
|
||||
else:
|
||||
@ -443,7 +474,9 @@ if __name__ == "__main__":
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
|
||||
except TypeError:
|
||||
# Fall back to the old signature for backward compatibility
|
||||
logger.warning(f"Module {module_name} using old run_benchmark signature - database connection will be created per module")
|
||||
logger.warning(
|
||||
f"Module {module_name} using old run_benchmark signature - database connection will be created per module"
|
||||
)
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
|
||||
|
||||
successful_benchmarks += 1
|
||||
|
@ -3,7 +3,11 @@ import subprocess
|
||||
|
||||
|
||||
def main(config_dir, config_name, args):
|
||||
subprocess.run(["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"] + ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"] + args)
|
||||
subprocess.run(
|
||||
["optimum-benchmark", "--config-dir", f"{config_dir}", "--config-name", f"{config_name}"]
|
||||
+ ["hydra/job_logging=disabled", "hydra/hydra_logging=disabled"]
|
||||
+ args
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -12,18 +12,19 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from benchmark_framework import ModelBenchmark
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import torch
|
||||
from benchmark_framework import ModelBenchmark
|
||||
|
||||
|
||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
|
||||
class LLaMABenchmark(ModelBenchmark):
|
||||
"""Simplified LLaMA model benchmark implementation using the ModelBenchmark base class."""
|
||||
|
||||
@ -31,9 +32,7 @@ class LLaMABenchmark(ModelBenchmark):
|
||||
super().__init__(logger)
|
||||
self._default_prompt = "Why dogs are so cute?" # Custom prompt for LLaMA
|
||||
|
||||
|
||||
|
||||
def get_scenario_configs(self) -> List[Dict[str, Any]]:
|
||||
def get_scenario_configs(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Get LLaMA-specific scenario configurations.
|
||||
|
||||
@ -43,24 +42,33 @@ class LLaMABenchmark(ModelBenchmark):
|
||||
return [
|
||||
# Eager variants
|
||||
{"variant": "eager", "compile_mode": None, "use_cache": True, "description": "Eager execution with cache"},
|
||||
|
||||
# Compiled variants
|
||||
{"variant": "compiled", "compile_mode": "max-autotune", "use_cache": True, "description": "Compiled with max autotune"},
|
||||
|
||||
{
|
||||
"variant": "compiled",
|
||||
"compile_mode": "max-autotune",
|
||||
"use_cache": True,
|
||||
"description": "Compiled with max autotune",
|
||||
},
|
||||
# Kernelized variant (if available)
|
||||
{"variant": "kernelized", "compile_mode": "max-autotune", "use_cache": True, "description": "Kernelized execution"},
|
||||
{
|
||||
"variant": "kernelized",
|
||||
"compile_mode": "max-autotune",
|
||||
"use_cache": True,
|
||||
"description": "Kernelized execution",
|
||||
},
|
||||
]
|
||||
|
||||
def _is_kernelization_available(self) -> bool:
|
||||
"""Check if kernelization is available for LLaMA."""
|
||||
try:
|
||||
from kernels import Mode, kernelize
|
||||
from kernels import Mode, kernelize # noqa: F401
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
self.logger.debug("Kernelization not available: kernels module not found")
|
||||
return False
|
||||
|
||||
def get_default_generation_config(self) -> Dict[str, Any]:
|
||||
def get_default_generation_config(self) -> dict[str, Any]:
|
||||
"""Get LLaMA-specific generation configuration."""
|
||||
return {
|
||||
"do_sample": False,
|
||||
@ -70,9 +78,8 @@ class LLaMABenchmark(ModelBenchmark):
|
||||
"max_new_tokens": None, # Will be set per scenario
|
||||
}
|
||||
|
||||
def get_model_init_kwargs(self, config) -> Dict[str, Any]:
|
||||
def get_model_init_kwargs(self, config) -> dict[str, Any]:
|
||||
"""Get LLaMA-specific model initialization kwargs."""
|
||||
from benchmark_framework import BenchmarkConfig
|
||||
return {
|
||||
"torch_dtype": getattr(torch, config.torch_dtype),
|
||||
"attn_implementation": config.attn_implementation,
|
||||
@ -103,18 +110,20 @@ def run_llama(logger, output_dir, **kwargs):
|
||||
from benchmark_framework import BenchmarkRunner
|
||||
|
||||
# Extract parameters with defaults
|
||||
model_id = kwargs.get('model_id', 'meta-llama/Llama-2-7b-hf')
|
||||
warmup_iterations = kwargs.get('warmup_iterations', 3)
|
||||
measurement_iterations = kwargs.get('measurement_iterations', 5)
|
||||
num_tokens_to_generate = kwargs.get('num_tokens_to_generate', 100)
|
||||
include_sdpa_variants = kwargs.get('include_sdpa_variants', True)
|
||||
device = kwargs.get('device', 'cuda')
|
||||
torch_dtype = kwargs.get('torch_dtype', 'float16')
|
||||
batch_size = kwargs.get('batch_size', 1)
|
||||
commit_id = kwargs.get('commit_id', None)
|
||||
model_id = kwargs.get("model_id", "meta-llama/Llama-2-7b-hf")
|
||||
warmup_iterations = kwargs.get("warmup_iterations", 3)
|
||||
measurement_iterations = kwargs.get("measurement_iterations", 5)
|
||||
num_tokens_to_generate = kwargs.get("num_tokens_to_generate", 100)
|
||||
include_sdpa_variants = kwargs.get("include_sdpa_variants", True)
|
||||
device = kwargs.get("device", "cuda")
|
||||
torch_dtype = kwargs.get("torch_dtype", "float16")
|
||||
batch_size = kwargs.get("batch_size", 1)
|
||||
commit_id = kwargs.get("commit_id")
|
||||
|
||||
logger.info(f"Starting LLaMA benchmark for model: {model_id}")
|
||||
logger.info(f"Configuration: warmup={warmup_iterations}, measurement={measurement_iterations}, tokens={num_tokens_to_generate}")
|
||||
logger.info(
|
||||
f"Configuration: warmup={warmup_iterations}, measurement={measurement_iterations}, tokens={num_tokens_to_generate}"
|
||||
)
|
||||
|
||||
try:
|
||||
# Create benchmark instance
|
||||
@ -129,7 +138,7 @@ def run_llama(logger, output_dir, **kwargs):
|
||||
include_sdpa_variants=include_sdpa_variants,
|
||||
device=device,
|
||||
torch_dtype=torch_dtype,
|
||||
batch_size=batch_size
|
||||
batch_size=batch_size,
|
||||
)
|
||||
|
||||
logger.info(f"Created {len(scenarios)} benchmark scenarios")
|
||||
@ -143,7 +152,7 @@ def run_llama(logger, output_dir, **kwargs):
|
||||
return None
|
||||
|
||||
# Save results
|
||||
model_name = model_id.split('/')[-1] # Extract model name from ID
|
||||
model_name = model_id.split("/")[-1] # Extract model name from ID
|
||||
output_file = runner.save_results(model_name, results)
|
||||
|
||||
logger.info(f"LLaMA benchmark completed successfully. Results saved to: {output_file}")
|
||||
@ -152,5 +161,6 @@ def run_llama(logger, output_dir, **kwargs):
|
||||
except Exception as e:
|
||||
logger.error(f"LLaMA benchmark failed: {e}")
|
||||
import traceback
|
||||
|
||||
logger.debug(traceback.format_exc())
|
||||
raise
|
@ -14,28 +14,26 @@
|
||||
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import statistics
|
||||
import threading
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import nullcontext
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from typing import Any, Callable, Dict, List, Optional, Union, TypedDict
|
||||
import logging
|
||||
import os
|
||||
import statistics
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional, TypedDict, Union
|
||||
|
||||
import gpustat
|
||||
import numpy as np
|
||||
import psutil
|
||||
import gpustat
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
class GPUMetrics(TypedDict):
|
||||
"""GPU monitoring result with GPU metrics."""
|
||||
|
||||
gpu_utilization_mean: float
|
||||
gpu_utilization_max: float
|
||||
gpu_utilization_min: float
|
||||
@ -48,6 +46,7 @@ class GPUMetrics(TypedDict):
|
||||
|
||||
class NoGPU(TypedDict):
|
||||
"""GPU monitoring result without GPU metrics."""
|
||||
|
||||
gpu_monitoring_status: str
|
||||
gpu_monitoring_reason: str
|
||||
|
||||
@ -134,6 +133,7 @@ class ArchAwareTimer:
|
||||
@dataclass
|
||||
class BenchmarkConfig:
|
||||
"""Configuration for a single benchmark scenario."""
|
||||
|
||||
name: str
|
||||
model_id: str
|
||||
variant: str = "eager" # "eager", "compiled", "kernelized"
|
||||
@ -143,13 +143,13 @@ class BenchmarkConfig:
|
||||
device: str = "cuda"
|
||||
torch_dtype: str = "float16"
|
||||
compile_mode: Optional[str] = None # None, "default", "reduce-overhead", "max-autotune"
|
||||
compile_options: Dict[str, Any] = field(default_factory=dict)
|
||||
compile_options: dict[str, Any] = field(default_factory=dict)
|
||||
use_cache: bool = True
|
||||
batch_size: int = 1
|
||||
sequence_length: Optional[int] = None
|
||||
attn_implementation: str = "sdpa" # "eager", "sdpa", "flash_attention_2"
|
||||
sdpa_backend: Optional[str] = None # None, "math", "flash_attention", "efficient_attention", "cudnn_attention"
|
||||
custom_params: Dict[str, Any] = field(default_factory=dict)
|
||||
custom_params: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class BenchmarkScenario:
|
||||
@ -195,24 +195,24 @@ class BenchmarkScenario:
|
||||
return f"BenchmarkScenario(name='{self.name}', variant='{self.config.variant}')"
|
||||
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimingResult:
|
||||
"""Result from a timing measurement."""
|
||||
|
||||
time_to_first_token_seconds: Optional[float] = None
|
||||
latency_seconds: float = 0.0
|
||||
tokens_per_second: Optional[float] = None
|
||||
time_per_output_token_seconds: Optional[float] = None
|
||||
total_tokens_generated: int = 0
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkStatistics:
|
||||
"""Statistical analysis of benchmark measurements."""
|
||||
|
||||
name: str
|
||||
measurements: List[float]
|
||||
measurements: list[float]
|
||||
mean: float
|
||||
median: float
|
||||
std: float
|
||||
@ -226,7 +226,7 @@ class BenchmarkStatistics:
|
||||
unit: str = "seconds"
|
||||
|
||||
@classmethod
|
||||
def from_measurements(cls, name: str, measurements: List[float], unit: str = "seconds") -> 'BenchmarkStatistics':
|
||||
def from_measurements(cls, name: str, measurements: list[float], unit: str = "seconds") -> "BenchmarkStatistics":
|
||||
"""Create statistics from a list of measurements."""
|
||||
if not measurements:
|
||||
raise ValueError("Cannot create statistics from empty measurements")
|
||||
@ -246,13 +246,14 @@ class BenchmarkStatistics:
|
||||
p90=float(np.percentile(measurements_array, 90)),
|
||||
p95=float(np.percentile(measurements_array, 95)),
|
||||
p99=float(np.percentile(measurements_array, 99)),
|
||||
unit=unit
|
||||
unit=unit,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class HardwareInfo:
|
||||
"""Hardware information collected during benchmarking."""
|
||||
|
||||
gpu_name: str
|
||||
gpu_memory_total_mb: int
|
||||
cpu_count: int
|
||||
@ -265,6 +266,7 @@ class HardwareInfo:
|
||||
@dataclass
|
||||
class BenchmarkMetadata:
|
||||
"""Metadata collected for each benchmark run."""
|
||||
|
||||
timestamp: str
|
||||
commit_id: str
|
||||
hardware_info: HardwareInfo
|
||||
@ -274,7 +276,7 @@ class BenchmarkMetadata:
|
||||
class GPUMonitor:
|
||||
"""Monitor GPU utilization during benchmark execution."""
|
||||
|
||||
def __init__(self, sample_interval: float = 0.1, logger: logging.Logger = None):
|
||||
def __init__(self, sample_interval: float = 0.1, logger: Optional[logging.Logger] = None):
|
||||
self.sample_interval = sample_interval
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.stop_event = threading.Event()
|
||||
@ -321,10 +323,7 @@ class GPUMonitor:
|
||||
def stop_and_collect(self) -> Union[GPUMetrics, NoGPU]:
|
||||
"""Stop monitoring and return collected metrics."""
|
||||
if not self.gpu_available:
|
||||
return NoGPU(
|
||||
gpu_monitoring_status="disabled",
|
||||
gpu_monitoring_reason="no_gpus_available"
|
||||
)
|
||||
return NoGPU(gpu_monitoring_status="disabled", gpu_monitoring_reason="no_gpus_available")
|
||||
|
||||
# Signal the monitoring thread to stop
|
||||
self.stop_event.set()
|
||||
@ -340,15 +339,12 @@ class GPUMonitor:
|
||||
gpu_memory_used_max=max(self.gpu_memory_used),
|
||||
gpu_memory_used_min=min(self.gpu_memory_used),
|
||||
sample_count=len(self.gpu_utilization),
|
||||
gpu_monitoring_status="success"
|
||||
gpu_monitoring_status="success",
|
||||
)
|
||||
self.logger.debug(f"GPU monitoring completed: {len(self.gpu_utilization)} samples collected")
|
||||
return metrics
|
||||
else:
|
||||
return NoGPU(
|
||||
gpu_monitoring_status="failed",
|
||||
gpu_monitoring_reason="no_samples_collected"
|
||||
)
|
||||
return NoGPU(gpu_monitoring_status="failed", gpu_monitoring_reason="no_samples_collected")
|
||||
|
||||
def _monitor_loop(self):
|
||||
"""Background monitoring loop using threading.Event for communication."""
|
||||
@ -400,7 +396,7 @@ def get_hardware_info() -> HardwareInfo:
|
||||
|
||||
torch_version = torch.__version__
|
||||
cuda_version = None
|
||||
if hasattr(torch, 'cuda') and torch.cuda.is_available():
|
||||
if hasattr(torch, "cuda") and torch.cuda.is_available():
|
||||
cuda_version = torch.version.cuda
|
||||
|
||||
return HardwareInfo(
|
||||
@ -410,14 +406,14 @@ def get_hardware_info() -> HardwareInfo:
|
||||
memory_total_mb=int(psutil.virtual_memory().total / (1024 * 1024)),
|
||||
python_version=f"{sys.version.split()[0]}",
|
||||
torch_version=torch_version,
|
||||
cuda_version=cuda_version
|
||||
cuda_version=cuda_version,
|
||||
)
|
||||
|
||||
|
||||
def flush_memory():
|
||||
"""Flush GPU memory and run garbage collection."""
|
||||
gc.collect()
|
||||
if hasattr(torch, 'cuda') and torch.cuda.is_available():
|
||||
if hasattr(torch, "cuda") and torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_max_memory_allocated()
|
||||
torch.cuda.reset_peak_memory_stats()
|
||||
@ -442,13 +438,10 @@ def get_sdpa_backend(backend_name: Optional[str]):
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class SDPAContext:
|
||||
"""Context manager for SDPA kernel selection."""
|
||||
|
||||
def __init__(self, backend_name: Optional[str], logger: logging.Logger = None):
|
||||
def __init__(self, backend_name: Optional[str], logger: Optional[logging.Logger] = None):
|
||||
self.backend_name = backend_name
|
||||
self.logger = logger or logging.getLogger(__name__)
|
||||
self.backend = get_sdpa_backend(backend_name) if backend_name else None
|
||||
@ -466,7 +459,9 @@ class SDPAContext:
|
||||
self.logger.warning(f"Failed to set SDPA backend {self.backend_name}: {e}")
|
||||
self.context = None
|
||||
elif self.backend_name and self.logger:
|
||||
self.logger.debug(f"SDPA backend '{self.backend_name}' requested but not using kernel context (backend={self.backend})")
|
||||
self.logger.debug(
|
||||
f"SDPA backend '{self.backend_name}' requested but not using kernel context (backend={self.backend})"
|
||||
)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
@ -490,7 +485,7 @@ class AbstractModelBenchmark(ABC):
|
||||
self.scenarios = {} # Map of scenario_name -> BenchmarkScenario
|
||||
|
||||
@abstractmethod
|
||||
def create_scenarios(self, **kwargs) -> Dict[str, 'BenchmarkScenario']:
|
||||
def create_scenarios(self, **kwargs) -> dict[str, "BenchmarkScenario"]:
|
||||
"""Create and return a dictionary of benchmark scenarios."""
|
||||
pass
|
||||
|
||||
@ -518,7 +513,7 @@ class AbstractModelBenchmark(ABC):
|
||||
"""Prepare inputs for the model. Override if needed."""
|
||||
return None
|
||||
|
||||
def get_scenarios(self, **kwargs) -> Dict[str, 'BenchmarkScenario']:
|
||||
def get_scenarios(self, **kwargs) -> dict[str, "BenchmarkScenario"]:
|
||||
"""Get benchmark scenarios. Creates them if they don't exist."""
|
||||
if not self.scenarios:
|
||||
self.scenarios = self.create_scenarios(**kwargs)
|
||||
@ -547,9 +542,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
"""Default prompt for text generation. Override in subclasses if needed."""
|
||||
return self._default_prompt
|
||||
|
||||
|
||||
|
||||
def get_attention_configs(self, include_sdpa_variants: bool = True) -> List[Dict[str, Any]]:
|
||||
def get_attention_configs(self, include_sdpa_variants: bool = True) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Get attention implementation configurations.
|
||||
|
||||
@ -565,15 +558,17 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
|
||||
# Add SDPA variants if requested
|
||||
if include_sdpa_variants:
|
||||
attention_configs.append({
|
||||
"attn_implementation": "sdpa",
|
||||
"sdpa_backends": [None, "math", "flash_attention", "efficient_attention"],
|
||||
"desc_suffix": ""
|
||||
})
|
||||
attention_configs.append(
|
||||
{
|
||||
"attn_implementation": "sdpa",
|
||||
"sdpa_backends": [None, "math", "flash_attention", "efficient_attention"],
|
||||
"desc_suffix": "",
|
||||
}
|
||||
)
|
||||
|
||||
return attention_configs
|
||||
|
||||
def get_scenario_configs(self) -> List[Dict[str, Any]]:
|
||||
def get_scenario_configs(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Get base scenario configurations. Override in subclasses to customize.
|
||||
|
||||
@ -583,36 +578,38 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
return [
|
||||
# Eager variants
|
||||
{"variant": "eager", "compile_mode": None, "use_cache": True, "description": "Eager execution with cache"},
|
||||
|
||||
# Compiled variants
|
||||
{"variant": "compiled", "compile_mode": "max-autotune", "use_cache": True, "description": "Compiled with max autotune"},
|
||||
|
||||
{
|
||||
"variant": "compiled",
|
||||
"compile_mode": "max-autotune",
|
||||
"use_cache": True,
|
||||
"description": "Compiled with max autotune",
|
||||
},
|
||||
# Kernelized variant (if available)
|
||||
{"variant": "kernelized", "compile_mode": "max-autotune", "use_cache": True, "description": "Kernelized execution"},
|
||||
{
|
||||
"variant": "kernelized",
|
||||
"compile_mode": "max-autotune",
|
||||
"use_cache": True,
|
||||
"description": "Kernelized execution",
|
||||
},
|
||||
]
|
||||
|
||||
def _is_kernelization_available(self) -> bool:
|
||||
"""Check if kernelization is available. Override in subclasses."""
|
||||
try:
|
||||
from kernels import Mode, kernelize
|
||||
from kernels import Mode, kernelize # noqa: F401
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
def get_default_generation_config(self) -> Dict[str, Any]:
|
||||
def get_default_generation_config(self) -> dict[str, Any]:
|
||||
"""Get default generation configuration. Override in subclasses for model-specific defaults."""
|
||||
return {
|
||||
"do_sample": False,
|
||||
"top_p": 1.0,
|
||||
"temperature": 1.0
|
||||
}
|
||||
return {"do_sample": False, "top_p": 1.0, "temperature": 1.0}
|
||||
|
||||
def get_model_init_kwargs(self, config: BenchmarkConfig) -> Dict[str, Any]:
|
||||
def get_model_init_kwargs(self, config: BenchmarkConfig) -> dict[str, Any]:
|
||||
"""Get model initialization kwargs. Override in subclasses for model-specific parameters."""
|
||||
return {
|
||||
"torch_dtype": getattr(torch, config.torch_dtype),
|
||||
"attn_implementation": config.attn_implementation
|
||||
}
|
||||
return {"torch_dtype": getattr(torch, config.torch_dtype), "attn_implementation": config.attn_implementation}
|
||||
|
||||
def get_default_torch_dtype(self) -> str:
|
||||
"""Get default torch dtype. Override in subclasses."""
|
||||
@ -622,19 +619,19 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
"""Get default device. Override in subclasses."""
|
||||
return "cuda"
|
||||
|
||||
def create_scenarios(self, **kwargs) -> Dict[str, 'BenchmarkScenario']:
|
||||
def create_scenarios(self, **kwargs) -> dict[str, "BenchmarkScenario"]:
|
||||
"""Create benchmark scenarios for HuggingFace models."""
|
||||
scenarios = {}
|
||||
|
||||
# Extract parameters with model-specific defaults
|
||||
model_id = kwargs.get('model_id', 'microsoft/DialoGPT-medium')
|
||||
warmup_iterations = kwargs.get('warmup_iterations', 3)
|
||||
measurement_iterations = kwargs.get('measurement_iterations', 5)
|
||||
num_tokens_to_generate = kwargs.get('num_tokens_to_generate', 100)
|
||||
include_sdpa_variants = kwargs.get('include_sdpa_variants', True)
|
||||
device = kwargs.get('device', self.get_default_device())
|
||||
torch_dtype = kwargs.get('torch_dtype', self.get_default_torch_dtype())
|
||||
batch_size = kwargs.get('batch_size', 1)
|
||||
model_id = kwargs.get("model_id", "microsoft/DialoGPT-medium")
|
||||
warmup_iterations = kwargs.get("warmup_iterations", 3)
|
||||
measurement_iterations = kwargs.get("measurement_iterations", 5)
|
||||
num_tokens_to_generate = kwargs.get("num_tokens_to_generate", 100)
|
||||
include_sdpa_variants = kwargs.get("include_sdpa_variants", True)
|
||||
device = kwargs.get("device", self.get_default_device())
|
||||
torch_dtype = kwargs.get("torch_dtype", self.get_default_torch_dtype())
|
||||
batch_size = kwargs.get("batch_size", 1)
|
||||
|
||||
# Get configurations
|
||||
attention_configs = self.get_attention_configs(include_sdpa_variants)
|
||||
@ -654,7 +651,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
|
||||
# Create unique config for this scenario
|
||||
config = BenchmarkConfig(
|
||||
name=scenario_config['variant'],
|
||||
name=scenario_config["variant"],
|
||||
model_id=model_id,
|
||||
variant=scenario_config["variant"],
|
||||
compile_mode=scenario_config["compile_mode"],
|
||||
@ -666,7 +663,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
torch_dtype=torch_dtype,
|
||||
batch_size=batch_size,
|
||||
attn_implementation=attn_implementation,
|
||||
sdpa_backend=sdpa_backend if attn_implementation == "sdpa" else None
|
||||
sdpa_backend=sdpa_backend if attn_implementation == "sdpa" else None,
|
||||
)
|
||||
|
||||
# Create scenario name
|
||||
@ -695,11 +692,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
description += desc_suffix
|
||||
|
||||
# Create scenario
|
||||
scenario = BenchmarkScenario(
|
||||
name=scenario_name,
|
||||
config=config,
|
||||
description=description
|
||||
)
|
||||
scenario = BenchmarkScenario(name=scenario_name, config=config, description=description)
|
||||
|
||||
# Add setup callbacks based on variant
|
||||
if scenario_config["variant"] == "compiled":
|
||||
@ -718,16 +711,12 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
|
||||
# Perform torch.compile
|
||||
if config.compile_mode is not None:
|
||||
self.compiled_model = torch.compile(
|
||||
model,
|
||||
mode=config.compile_mode,
|
||||
**config.compile_options
|
||||
)
|
||||
self.compiled_model = torch.compile(model, mode=config.compile_mode, **config.compile_options)
|
||||
else:
|
||||
self.compiled_model = torch.compile(model, **config.compile_options)
|
||||
|
||||
# Setup static cache for compiled mode if needed
|
||||
if config.use_cache and hasattr(self, 'inputs') and self.inputs is not None:
|
||||
if config.use_cache and hasattr(self, "inputs") and self.inputs is not None:
|
||||
self._setup_static_cache(config)
|
||||
|
||||
def _setup_kernelization_callback(self, model, tokenizer, config, logger):
|
||||
@ -737,10 +726,8 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
|
||||
try:
|
||||
from kernels import Mode, kernelize
|
||||
self.compiled_model = kernelize(
|
||||
model,
|
||||
mode=Mode.INFERENCE
|
||||
)
|
||||
|
||||
self.compiled_model = kernelize(model, mode=Mode.INFERENCE)
|
||||
except Exception as e:
|
||||
if logger:
|
||||
logger.warning(f"Failed to setup kernelized mode: {e}")
|
||||
@ -749,13 +736,14 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
|
||||
def _setup_static_cache(self, config: BenchmarkConfig):
|
||||
"""Setup static cache for compiled models. Override if needed."""
|
||||
if hasattr(self, 'inputs') and self.inputs is not None:
|
||||
if hasattr(self, "inputs") and self.inputs is not None:
|
||||
try:
|
||||
from transformers import StaticCache
|
||||
|
||||
seq_length = self.inputs["input_ids"].shape[1]
|
||||
|
||||
# Get the actual device the model is on
|
||||
if hasattr(self.model, 'device'):
|
||||
if hasattr(self.model, "device"):
|
||||
cache_device = self.model.device
|
||||
else:
|
||||
cache_device = self.device
|
||||
@ -765,7 +753,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
max_batch_size=config.batch_size,
|
||||
max_cache_len=seq_length + config.num_tokens_to_generate,
|
||||
device=cache_device,
|
||||
dtype=getattr(torch, config.torch_dtype)
|
||||
dtype=getattr(torch, config.torch_dtype),
|
||||
)
|
||||
self.logger.debug(f"StaticCache created on device: {cache_device}")
|
||||
except (ImportError, TypeError) as e:
|
||||
@ -794,7 +782,6 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
def _load_model_and_tokenizer(self, config: BenchmarkConfig):
|
||||
"""Load the model and tokenizer. Override in subclasses for custom loading."""
|
||||
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
||||
|
||||
# Load tokenizer
|
||||
@ -812,14 +799,9 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
target_device = config.device
|
||||
# Get model initialization kwargs
|
||||
model_init_kwargs = self.get_model_init_kwargs(config)
|
||||
model_init_kwargs.update({
|
||||
"generation_config": gen_config
|
||||
})
|
||||
model_init_kwargs.update({"generation_config": gen_config})
|
||||
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
config.model_id,
|
||||
**model_init_kwargs
|
||||
).eval()
|
||||
self.model = AutoModelForCausalLM.from_pretrained(config.model_id, **model_init_kwargs).eval()
|
||||
|
||||
# Move model to target device
|
||||
self.logger.info(f"Moving model to device: {target_device}")
|
||||
@ -832,7 +814,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
self.inputs = self.tokenizer(self.default_prompt, return_tensors="pt")
|
||||
|
||||
# Move inputs to the same device as the model
|
||||
if hasattr(self.model, 'device'):
|
||||
if hasattr(self.model, "device"):
|
||||
# Model is on a single device
|
||||
model_device = self.model.device
|
||||
else:
|
||||
@ -849,16 +831,16 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
|
||||
def cleanup_model(self) -> None:
|
||||
"""Cleanup model resources."""
|
||||
if hasattr(self, 'model') and self.model is not None:
|
||||
if hasattr(self, "model") and self.model is not None:
|
||||
del self.model
|
||||
self.model = None
|
||||
if hasattr(self, 'compiled_model') and self.compiled_model is not None:
|
||||
if hasattr(self, "compiled_model") and self.compiled_model is not None:
|
||||
del self.compiled_model
|
||||
self.compiled_model = None
|
||||
if hasattr(self, 'tokenizer') and self.tokenizer is not None:
|
||||
if hasattr(self, "tokenizer") and self.tokenizer is not None:
|
||||
del self.tokenizer
|
||||
self.tokenizer = None
|
||||
if hasattr(self, 'past_key_values') and self.past_key_values is not None:
|
||||
if hasattr(self, "past_key_values") and self.past_key_values is not None:
|
||||
del self.past_key_values
|
||||
self.past_key_values = None
|
||||
|
||||
@ -877,7 +859,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
# Use SDPA context if specified
|
||||
with SDPAContext(config.sdpa_backend, self.logger):
|
||||
with torch.no_grad():
|
||||
outputs = model_to_use.generate(**generation_kwargs)
|
||||
_ = model_to_use.generate(**generation_kwargs)
|
||||
|
||||
return timer.elapsed_time()
|
||||
|
||||
@ -915,11 +897,11 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
"variant": config.variant,
|
||||
"compile_mode": config.compile_mode,
|
||||
"attn_implementation": config.attn_implementation,
|
||||
"sdpa_backend": config.sdpa_backend
|
||||
}
|
||||
"sdpa_backend": config.sdpa_backend,
|
||||
},
|
||||
)
|
||||
|
||||
def _get_generation_kwargs(self, config: BenchmarkConfig, max_new_tokens: int) -> Dict[str, Any]:
|
||||
def _get_generation_kwargs(self, config: BenchmarkConfig, max_new_tokens: int) -> dict[str, Any]:
|
||||
"""Get generation kwargs. Override in subclasses for custom generation."""
|
||||
generation_config_dict = self.get_default_generation_config()
|
||||
generation_kwargs = {
|
||||
@ -935,11 +917,12 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
if self.past_key_values is not None and config.variant == "compiled":
|
||||
try:
|
||||
from transformers import StaticCache
|
||||
|
||||
# Reset cache for each measurement
|
||||
seq_length = self.inputs["input_ids"].shape[1]
|
||||
|
||||
# Get the actual device the model is on
|
||||
if hasattr(self.model, 'device'):
|
||||
if hasattr(self.model, "device"):
|
||||
cache_device = self.model.device
|
||||
else:
|
||||
cache_device = self.device
|
||||
@ -949,7 +932,7 @@ class ModelBenchmark(AbstractModelBenchmark):
|
||||
max_batch_size=config.batch_size,
|
||||
max_cache_len=seq_length + max_new_tokens,
|
||||
device=cache_device,
|
||||
dtype=getattr(torch, config.torch_dtype)
|
||||
dtype=getattr(torch, config.torch_dtype),
|
||||
)
|
||||
generation_kwargs["past_key_values"] = fresh_cache
|
||||
except (ImportError, TypeError) as e:
|
||||
@ -967,14 +950,13 @@ class BenchmarkRunner:
|
||||
self.output_dir = output_dir
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
self,
|
||||
benchmark: ModelBenchmark,
|
||||
scenarios: Dict[str, BenchmarkScenario],
|
||||
scenarios: dict[str, BenchmarkScenario],
|
||||
collect_gpu_metrics: bool = True,
|
||||
commit_id: Optional[str] = None
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
commit_id: Optional[str] = None,
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""
|
||||
Run benchmarks using scenarios.
|
||||
|
||||
@ -1021,7 +1003,7 @@ class BenchmarkRunner:
|
||||
timestamp=datetime.utcnow().isoformat(),
|
||||
commit_id=commit_id,
|
||||
hardware_info=get_hardware_info(),
|
||||
config=config
|
||||
config=config,
|
||||
)
|
||||
|
||||
# Initialize GPU monitor
|
||||
@ -1037,11 +1019,13 @@ class BenchmarkRunner:
|
||||
_ = benchmark.measure_latency(config)
|
||||
except Exception as e:
|
||||
warmup_failures += 1
|
||||
self.logger.warning(f"Warmup iteration {i+1} failed: {e}")
|
||||
self.logger.warning(f"Warmup iteration {i + 1} failed: {e}")
|
||||
|
||||
# If more than half the warmup iterations failed, skip this scenario
|
||||
if warmup_failures > config.warmup_iterations // 2:
|
||||
self.logger.warning(f"Skipping scenario {scenario_name}: too many warmup failures ({warmup_failures}/{config.warmup_iterations})")
|
||||
self.logger.warning(
|
||||
f"Skipping scenario {scenario_name}: too many warmup failures ({warmup_failures}/{config.warmup_iterations})"
|
||||
)
|
||||
try:
|
||||
scenario.teardown(benchmark.model, benchmark.tokenizer, self.logger)
|
||||
benchmark.cleanup_model()
|
||||
@ -1077,12 +1061,18 @@ class BenchmarkRunner:
|
||||
if timing_result.time_per_output_token_seconds is not None:
|
||||
itl_measurements.append(timing_result.time_per_output_token_seconds)
|
||||
|
||||
itl_str = f", itl={timing_result.time_per_output_token_seconds:.4f}s/token" if timing_result.time_per_output_token_seconds else ""
|
||||
self.logger.debug(f"Iteration {i+1}: latency={timing_result.latency_seconds:.4f}s, ttft={ttft:.4f}s{itl_str}")
|
||||
itl_str = (
|
||||
f", itl={timing_result.time_per_output_token_seconds:.4f}s/token"
|
||||
if timing_result.time_per_output_token_seconds
|
||||
else ""
|
||||
)
|
||||
self.logger.debug(
|
||||
f"Iteration {i + 1}: latency={timing_result.latency_seconds:.4f}s, ttft={ttft:.4f}s{itl_str}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
measurement_failures += 1
|
||||
self.logger.warning(f"Measurement iteration {i+1} failed: {e}")
|
||||
self.logger.warning(f"Measurement iteration {i + 1} failed: {e}")
|
||||
|
||||
# Stop GPU monitoring
|
||||
gpu_metrics = {}
|
||||
@ -1091,7 +1081,9 @@ class BenchmarkRunner:
|
||||
|
||||
# If we don't have enough successful measurements, skip this scenario
|
||||
if not latency_measurements or len(latency_measurements) < config.measurement_iterations // 2:
|
||||
self.logger.warning(f"Skipping scenario {scenario_name}: insufficient successful measurements ({len(latency_measurements)}/{config.measurement_iterations})")
|
||||
self.logger.warning(
|
||||
f"Skipping scenario {scenario_name}: insufficient successful measurements ({len(latency_measurements)}/{config.measurement_iterations})"
|
||||
)
|
||||
try:
|
||||
scenario.teardown(benchmark.model, benchmark.tokenizer, self.logger)
|
||||
benchmark.cleanup_model()
|
||||
@ -1104,7 +1096,7 @@ class BenchmarkRunner:
|
||||
"metadata": asdict(metadata),
|
||||
"measurements": {},
|
||||
"gpu_metrics": gpu_metrics,
|
||||
"scenario_description": scenario.description
|
||||
"scenario_description": scenario.description,
|
||||
}
|
||||
|
||||
if latency_measurements:
|
||||
@ -1112,15 +1104,21 @@ class BenchmarkRunner:
|
||||
scenario_results["measurements"]["latency_seconds"] = asdict(latency_stats)
|
||||
|
||||
if ttft_measurements:
|
||||
ttft_stats = BenchmarkStatistics.from_measurements("time_to_first_token_seconds", ttft_measurements)
|
||||
ttft_stats = BenchmarkStatistics.from_measurements(
|
||||
"time_to_first_token_seconds", ttft_measurements
|
||||
)
|
||||
scenario_results["measurements"]["time_to_first_token_seconds"] = asdict(ttft_stats)
|
||||
|
||||
if tokens_per_sec_measurements:
|
||||
tps_stats = BenchmarkStatistics.from_measurements("tokens_per_second", tokens_per_sec_measurements, "tokens/sec")
|
||||
tps_stats = BenchmarkStatistics.from_measurements(
|
||||
"tokens_per_second", tokens_per_sec_measurements, "tokens/sec"
|
||||
)
|
||||
scenario_results["measurements"]["tokens_per_second"] = asdict(tps_stats)
|
||||
|
||||
if itl_measurements:
|
||||
itl_stats = BenchmarkStatistics.from_measurements("time_per_output_token_seconds", itl_measurements, "seconds/token")
|
||||
itl_stats = BenchmarkStatistics.from_measurements(
|
||||
"time_per_output_token_seconds", itl_measurements, "seconds/token"
|
||||
)
|
||||
scenario_results["measurements"]["time_per_output_token_seconds"] = asdict(itl_stats)
|
||||
|
||||
# Log summary
|
||||
@ -1149,6 +1147,7 @@ class BenchmarkRunner:
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Skipping scenario {scenario_name}: setup failed - {e}")
|
||||
import traceback
|
||||
|
||||
self.logger.debug(traceback.format_exc())
|
||||
|
||||
# Try to clean up if possible
|
||||
@ -1169,7 +1168,7 @@ class BenchmarkRunner:
|
||||
|
||||
return all_results
|
||||
|
||||
def save_results(self, model_name: str, results: Dict[str, Dict[str, Any]]) -> str:
|
||||
def save_results(self, model_name: str, results: dict[str, dict[str, Any]]) -> str:
|
||||
"""Save benchmark results to JSON file."""
|
||||
# Create model-specific subdirectory
|
||||
model_dir = os.path.join(self.output_dir, model_name)
|
||||
@ -1181,24 +1180,20 @@ class BenchmarkRunner:
|
||||
filepath = os.path.join(model_dir, filename)
|
||||
|
||||
# Prepare output structure
|
||||
output_data = {
|
||||
"model_name": model_name,
|
||||
"benchmark_scenarios": []
|
||||
}
|
||||
output_data = {"model_name": model_name, "benchmark_scenarios": []}
|
||||
|
||||
for config_name, config_results in results.items():
|
||||
scenario = {
|
||||
"scenario_name": config_name,
|
||||
"metadata": config_results["metadata"],
|
||||
"measurements": config_results["measurements"],
|
||||
"gpu_metrics": config_results.get("gpu_metrics", {})
|
||||
"gpu_metrics": config_results.get("gpu_metrics", {}),
|
||||
}
|
||||
output_data["benchmark_scenarios"].append(scenario)
|
||||
|
||||
# Save to JSON file
|
||||
with open(filepath, 'w') as f:
|
||||
with open(filepath, "w") as f:
|
||||
json.dump(output_data, f, indent=2, default=str)
|
||||
|
||||
self.logger.info(f"Results saved to {filepath}")
|
||||
return filepath
|
||||
|
@ -20,38 +20,34 @@ in the ./benches directory, organizing outputs into model-specific subfolders.
|
||||
|
||||
import argparse
|
||||
import importlib.util
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
def setup_logging(log_level: str = "INFO", enable_file_logging: bool = False) -> logging.Logger:
|
||||
"""Setup logging configuration."""
|
||||
numeric_level = getattr(logging, log_level.upper(), None)
|
||||
if not isinstance(numeric_level, int):
|
||||
raise ValueError(f'Invalid log level: {log_level}')
|
||||
raise ValueError(f"Invalid log level: {log_level}")
|
||||
|
||||
handlers = [logging.StreamHandler(sys.stdout)]
|
||||
|
||||
if enable_file_logging:
|
||||
handlers.append(
|
||||
logging.FileHandler(f'benchmark_run_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
|
||||
)
|
||||
handlers.append(logging.FileHandler(f"benchmark_run_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"))
|
||||
|
||||
logging.basicConfig(
|
||||
level=numeric_level,
|
||||
format='[%(levelname)s - %(asctime)s] %(name)s: %(message)s',
|
||||
handlers=handlers
|
||||
level=numeric_level, format="[%(levelname)s - %(asctime)s] %(name)s: %(message)s", handlers=handlers
|
||||
)
|
||||
|
||||
return logging.getLogger(__name__)
|
||||
|
||||
|
||||
def discover_benchmarks(benches_dir: str) -> List[Dict[str, Any]]:
|
||||
def discover_benchmarks(benches_dir: str) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Discover all benchmark modules in the benches directory.
|
||||
|
||||
@ -77,20 +73,24 @@ def discover_benchmarks(benches_dir: str) -> List[Dict[str, Any]]:
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
# Check if it has a benchmark runner function
|
||||
if hasattr(module, f'run_{module_name}'):
|
||||
benchmarks.append({
|
||||
'name': module_name,
|
||||
'path': str(py_file),
|
||||
'module': module,
|
||||
'runner_function': getattr(module, f'run_{module_name}')
|
||||
})
|
||||
elif hasattr(module, 'run_benchmark'):
|
||||
benchmarks.append({
|
||||
'name': module_name,
|
||||
'path': str(py_file),
|
||||
'module': module,
|
||||
'runner_function': getattr(module, 'run_benchmark')
|
||||
})
|
||||
if hasattr(module, f"run_{module_name}"):
|
||||
benchmarks.append(
|
||||
{
|
||||
"name": module_name,
|
||||
"path": str(py_file),
|
||||
"module": module,
|
||||
"runner_function": getattr(module, f"run_{module_name}"),
|
||||
}
|
||||
)
|
||||
elif hasattr(module, "run_benchmark"):
|
||||
benchmarks.append(
|
||||
{
|
||||
"name": module_name,
|
||||
"path": str(py_file),
|
||||
"module": module,
|
||||
"runner_function": getattr(module, "run_benchmark"),
|
||||
}
|
||||
)
|
||||
else:
|
||||
logging.warning(f"No runner function found in {py_file}")
|
||||
|
||||
@ -101,10 +101,7 @@ def discover_benchmarks(benches_dir: str) -> List[Dict[str, Any]]:
|
||||
|
||||
|
||||
def run_single_benchmark(
|
||||
benchmark_info: Dict[str, Any],
|
||||
output_dir: str,
|
||||
logger: logging.Logger,
|
||||
**kwargs
|
||||
benchmark_info: dict[str, Any], output_dir: str, logger: logging.Logger, **kwargs
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Run a single benchmark and return the output file path.
|
||||
@ -118,21 +115,19 @@ def run_single_benchmark(
|
||||
Returns:
|
||||
Path to the output file if successful, None otherwise
|
||||
"""
|
||||
benchmark_name = benchmark_info['name']
|
||||
runner_func = benchmark_info['runner_function']
|
||||
benchmark_name = benchmark_info["name"]
|
||||
runner_func = benchmark_info["runner_function"]
|
||||
|
||||
logger.info(f"Running benchmark: {benchmark_name}")
|
||||
|
||||
try:
|
||||
# Check function signature to determine what arguments to pass
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(runner_func)
|
||||
|
||||
# Prepare arguments based on function signature
|
||||
func_kwargs = {
|
||||
'logger': logger,
|
||||
'output_dir': output_dir
|
||||
}
|
||||
func_kwargs = {"logger": logger, "output_dir": output_dir}
|
||||
|
||||
# Add other kwargs if the function accepts them
|
||||
for param_name in sig.parameters:
|
||||
@ -145,8 +140,7 @@ def run_single_benchmark(
|
||||
if has_var_kwargs:
|
||||
valid_kwargs = {**func_kwargs, **kwargs}
|
||||
else:
|
||||
valid_kwargs = {k: v for k, v in func_kwargs.items()
|
||||
if k in sig.parameters}
|
||||
valid_kwargs = {k: v for k, v in func_kwargs.items() if k in sig.parameters}
|
||||
|
||||
# Run the benchmark
|
||||
result = runner_func(**valid_kwargs)
|
||||
@ -161,15 +155,12 @@ def run_single_benchmark(
|
||||
except Exception as e:
|
||||
logger.error(f"Benchmark {benchmark_name} failed: {e}")
|
||||
import traceback
|
||||
|
||||
logger.debug(traceback.format_exc())
|
||||
return None
|
||||
|
||||
|
||||
def generate_summary_report(
|
||||
output_dir: str,
|
||||
benchmark_results: Dict[str, Any],
|
||||
logger: logging.Logger
|
||||
) -> str:
|
||||
def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any], logger: logging.Logger) -> str:
|
||||
"""Generate a summary report of all benchmark runs."""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.json")
|
||||
@ -179,13 +170,13 @@ def generate_summary_report(
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"total_benchmarks": len(benchmark_results),
|
||||
"successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]),
|
||||
"failed_benchmarks": len([r for r in benchmark_results.values() if r is None])
|
||||
"failed_benchmarks": len([r for r in benchmark_results.values() if r is None]),
|
||||
},
|
||||
"benchmark_results": benchmark_results,
|
||||
"output_directory": output_dir
|
||||
"output_directory": output_dir,
|
||||
}
|
||||
|
||||
with open(summary_file, 'w') as f:
|
||||
with open(summary_file, "w") as f:
|
||||
json.dump(summary_data, f, indent=2, default=str)
|
||||
|
||||
logger.info(f"Summary report saved to: {summary_file}")
|
||||
@ -194,22 +185,20 @@ def generate_summary_report(
|
||||
|
||||
def main():
|
||||
"""Main entry point for the benchmarking script."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run all benchmarks in the ./benches directory"
|
||||
)
|
||||
parser = argparse.ArgumentParser(description="Run all benchmarks in the ./benches directory")
|
||||
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default="benchmark_results",
|
||||
help="Base output directory for benchmark results (default: benchmark_results)"
|
||||
help="Base output directory for benchmark results (default: benchmark_results)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--benches-dir",
|
||||
type=str,
|
||||
default="./benches",
|
||||
help="Directory containing benchmark implementations (default: ./benches)"
|
||||
help="Directory containing benchmark implementations (default: ./benches)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@ -217,66 +206,34 @@ def main():
|
||||
type=str,
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
|
||||
default="INFO",
|
||||
help="Logging level (default: INFO)"
|
||||
help="Logging level (default: INFO)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--model-id",
|
||||
type=str,
|
||||
help="Specific model ID to benchmark (if supported by benchmarks)"
|
||||
)
|
||||
parser.add_argument("--model-id", type=str, help="Specific model ID to benchmark (if supported by benchmarks)")
|
||||
|
||||
parser.add_argument("--warmup-iterations", type=int, default=3, help="Number of warmup iterations (default: 3)")
|
||||
|
||||
parser.add_argument(
|
||||
"--warmup-iterations",
|
||||
type=int,
|
||||
default=3,
|
||||
help="Number of warmup iterations (default: 3)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--measurement-iterations",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of measurement iterations (default: 5)"
|
||||
"--measurement-iterations", type=int, default=5, help="Number of measurement iterations (default: 5)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--num-tokens-to-generate",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Number of tokens to generate in benchmarks (default: 100)"
|
||||
help="Number of tokens to generate in benchmarks (default: 100)",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--include",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Only run benchmarks matching these names"
|
||||
)
|
||||
parser.add_argument("--include", type=str, nargs="*", help="Only run benchmarks matching these names")
|
||||
|
||||
parser.add_argument("--exclude", type=str, nargs="*", help="Exclude benchmarks matching these names")
|
||||
|
||||
parser.add_argument("--enable-mock", action="store_true", help="Enable mock benchmark (skipped by default)")
|
||||
|
||||
parser.add_argument("--enable-file-logging", action="store_true", help="Enable file logging (disabled by default)")
|
||||
|
||||
parser.add_argument(
|
||||
"--exclude",
|
||||
type=str,
|
||||
nargs="*",
|
||||
help="Exclude benchmarks matching these names"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--enable-mock",
|
||||
action="store_true",
|
||||
help="Enable mock benchmark (skipped by default)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--enable-file-logging",
|
||||
action="store_true",
|
||||
help="Enable file logging (disabled by default)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--commit-id",
|
||||
type=str,
|
||||
help="Git commit ID for metadata (if not provided, will auto-detect from git)"
|
||||
"--commit-id", type=str, help="Git commit ID for metadata (if not provided, will auto-detect from git)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@ -304,13 +261,15 @@ def main():
|
||||
filtered_benchmarks = benchmarks
|
||||
|
||||
if args.include:
|
||||
filtered_benchmarks = [b for b in filtered_benchmarks
|
||||
if any(pattern in b['name'] for pattern in args.include)]
|
||||
filtered_benchmarks = [
|
||||
b for b in filtered_benchmarks if any(pattern in b["name"] for pattern in args.include)
|
||||
]
|
||||
logger.info(f"Filtered to include: {[b['name'] for b in filtered_benchmarks]}")
|
||||
|
||||
if args.exclude:
|
||||
filtered_benchmarks = [b for b in filtered_benchmarks
|
||||
if not any(pattern in b['name'] for pattern in args.exclude)]
|
||||
filtered_benchmarks = [
|
||||
b for b in filtered_benchmarks if not any(pattern in b["name"] for pattern in args.exclude)
|
||||
]
|
||||
logger.info(f"After exclusion: {[b['name'] for b in filtered_benchmarks]}")
|
||||
|
||||
if not filtered_benchmarks:
|
||||
@ -319,34 +278,29 @@ def main():
|
||||
|
||||
# Prepare common kwargs for benchmarks
|
||||
benchmark_kwargs = {
|
||||
'warmup_iterations': args.warmup_iterations,
|
||||
'measurement_iterations': args.measurement_iterations,
|
||||
'num_tokens_to_generate': args.num_tokens_to_generate
|
||||
"warmup_iterations": args.warmup_iterations,
|
||||
"measurement_iterations": args.measurement_iterations,
|
||||
"num_tokens_to_generate": args.num_tokens_to_generate,
|
||||
}
|
||||
|
||||
if args.model_id:
|
||||
benchmark_kwargs['model_id'] = args.model_id
|
||||
benchmark_kwargs["model_id"] = args.model_id
|
||||
|
||||
# Add enable_mock flag for mock benchmark
|
||||
benchmark_kwargs['enable_mock'] = args.enable_mock
|
||||
benchmark_kwargs["enable_mock"] = args.enable_mock
|
||||
|
||||
# Add commit_id if provided
|
||||
if args.commit_id:
|
||||
benchmark_kwargs['commit_id'] = args.commit_id
|
||||
benchmark_kwargs["commit_id"] = args.commit_id
|
||||
|
||||
# Run benchmarks
|
||||
benchmark_results = {}
|
||||
successful_count = 0
|
||||
|
||||
for benchmark_info in filtered_benchmarks:
|
||||
result = run_single_benchmark(
|
||||
benchmark_info,
|
||||
args.output_dir,
|
||||
logger,
|
||||
**benchmark_kwargs
|
||||
)
|
||||
result = run_single_benchmark(benchmark_info, args.output_dir, logger, **benchmark_kwargs)
|
||||
|
||||
benchmark_results[benchmark_info['name']] = result
|
||||
benchmark_results[benchmark_info["name"]] = result
|
||||
|
||||
if result is not None:
|
||||
successful_count += 1
|
||||
@ -377,6 +331,7 @@ def main():
|
||||
except Exception as e:
|
||||
logger.error(f"Benchmark run failed: {e}")
|
||||
import traceback
|
||||
|
||||
logger.debug(traceback.format_exc())
|
||||
return 1
|
||||
|
||||
|
@ -4,8 +4,8 @@ import datasets
|
||||
|
||||
import transformers
|
||||
from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS
|
||||
from transformers.utils import logging
|
||||
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
||||
from transformers.utils import logging
|
||||
|
||||
|
||||
logging.set_verbosity_info()
|
||||
@ -22,7 +22,9 @@ imperfect = 0
|
||||
wrong = 0
|
||||
|
||||
|
||||
def check_diff(spm_diff: list[int], tok_diff: list[int], slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizerBase) -> bool:
|
||||
def check_diff(
|
||||
spm_diff: list[int], tok_diff: list[int], slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizerBase
|
||||
) -> bool:
|
||||
if spm_diff == list(reversed(tok_diff)):
|
||||
# AAA -> AA+A vs A+AA case.
|
||||
return True
|
||||
@ -54,7 +56,9 @@ def check_LTR_mark(line: str, idx: int, fast: PreTrainedTokenizerBase) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def check_details(line: str, spm_ids: list[int], tok_ids: list[int], slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizerBase) -> bool:
|
||||
def check_details(
|
||||
line: str, spm_ids: list[int], tok_ids: list[int], slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizerBase
|
||||
) -> bool:
|
||||
# Encoding can be the same with same result AAA -> A + AA vs AA + A
|
||||
# We can check that we use at least exactly the same number of tokens.
|
||||
for i, (spm_id, tok_id) in enumerate(zip(spm_ids, tok_ids)):
|
||||
@ -90,7 +94,9 @@ def check_details(line: str, spm_ids: list[int], tok_ids: list[int], slow: PreTr
|
||||
if tok_ids[first + k : first + k + min_width] == spm_ids[first + i : first + i + min_width]
|
||||
]
|
||||
for j in possible_matches:
|
||||
if check_diff(spm_ids[first : first + i], tok_ids[first : first + j], slow, fast) and check_details(
|
||||
if check_diff(
|
||||
spm_ids[first : first + i], tok_ids[first : first + j], slow, fast
|
||||
) and check_details(
|
||||
line,
|
||||
spm_ids[first + i : last],
|
||||
tok_ids[first + j : last],
|
||||
@ -140,9 +146,9 @@ def test_string(slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizerBase, te
|
||||
if skip_assert:
|
||||
return
|
||||
|
||||
assert (
|
||||
slow_ids == fast_ids
|
||||
), f"line {text} : \n\n{slow_ids}\n{fast_ids}\n\n{slow.tokenize(text)}\n{fast.tokenize(text)}"
|
||||
assert slow_ids == fast_ids, (
|
||||
f"line {text} : \n\n{slow_ids}\n{fast_ids}\n\n{slow.tokenize(text)}\n{fast.tokenize(text)}"
|
||||
)
|
||||
|
||||
|
||||
def test_tokenizer(slow: PreTrainedTokenizerBase, fast: PreTrainedTokenizerBase) -> None:
|
||||
|
@ -15,6 +15,7 @@
|
||||
Script to close stale issue. Taken in part from the AllenNLP repository.
|
||||
https://github.com/allenai/allennlp.
|
||||
"""
|
||||
|
||||
import os
|
||||
from datetime import datetime as dt
|
||||
|
||||
@ -39,10 +40,11 @@ def main():
|
||||
|
||||
for i, issue in enumerate(open_issues):
|
||||
print(i, issue)
|
||||
comments = sorted(list(issue.get_comments()), key=lambda i: i.created_at, reverse=True)
|
||||
comments = sorted(issue.get_comments(), key=lambda i: i.created_at, reverse=True)
|
||||
last_comment = comments[0] if len(comments) > 0 else None
|
||||
if (
|
||||
last_comment is not None and last_comment.user.login == "github-actions[bot]"
|
||||
last_comment is not None
|
||||
and last_comment.user.login == "github-actions[bot]"
|
||||
and (dt.utcnow() - issue.updated_at.replace(tzinfo=None)).days > 7
|
||||
and (dt.utcnow() - issue.created_at.replace(tzinfo=None)).days >= 30
|
||||
and not any(label.name.lower() in LABELS_TO_EXEMPT for label in issue.get_labels())
|
||||
|
Reference in New Issue
Block a user