mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Add more documentation and improve usability of lognormal dist (benchmark_serving_multi_turn) (#23255)
Signed-off-by: daniels <daniels@pliops.com>
This commit is contained in:
committed by
GitHub
parent
ca2d1925ef
commit
43a62c51be
@ -55,6 +55,107 @@ output_num_chunks 166.0 99.01 11.80 79.00 90.00 98.00 108.75
|
||||
----------------------------------------------------------------------------------------------------
|
||||
```
|
||||
|
||||
### JSON configuration file for synthetic conversations generation
|
||||
|
||||
The input flag `--input-file` is used to determine the input conversations for the benchmark.<br/>
|
||||
When the input is a JSON file with the field `"filetype": "generate_conversations"` the tool will generate synthetic multi-turn (questions and answers) conversations.
|
||||
|
||||
The file `generate_multi_turn.json` is an example file.
|
||||
|
||||
The file must contain the sections `prompt_input` and `prompt_output`.
|
||||
|
||||
The `prompt_input` section must contain `num_turns`, `prefix_num_tokens` and `num_tokens`:
|
||||
|
||||
* `num_turns` - Number of total turns in the conversation (both user & assistant).<br/>
|
||||
The final value will always be rounded to an even number so each user turn has a reply.
|
||||
* `prefix_num_tokens` - Tokens added at the start of only the **first user turn** in a conversation (unique per conversation).
|
||||
* `num_tokens` - Total token length of each **user** message (one turn).
|
||||
|
||||
The `prompt_output` section must contain `num_tokens`:
|
||||
|
||||
* `num_tokens` - Total token length of each **assistant** message (one turn).
|
||||
|
||||
### Random distributions for synthetic conversations generation
|
||||
|
||||
When creating an input JSON file (such as `generate_multi_turn.json`),<br/>
|
||||
every numeric field (such as `num_turns` or `num_tokens`) requires a distribution.<br/>
|
||||
The distribution determines how to randomly sample values for the field.
|
||||
|
||||
The available distributions are listed below.
|
||||
|
||||
**Note:** The optional `max` field (for lognormal, zipf, and poisson) can be used to cap sampled values at an upper bound.</br>
|
||||
Can be used to make sure that the total number of tokens in every request does not exceed `--max-model-len`.
|
||||
|
||||
#### constant
|
||||
|
||||
```json
|
||||
{
|
||||
"distribution": "constant",
|
||||
"value": 500
|
||||
}
|
||||
```
|
||||
|
||||
* `value` - the fixed integer value (always returns the same number).
|
||||
|
||||
#### uniform
|
||||
|
||||
```json
|
||||
{
|
||||
"distribution": "uniform",
|
||||
"min": 12,
|
||||
"max": 18
|
||||
}
|
||||
```
|
||||
|
||||
* `min` - minimum value (inclusive).
|
||||
* `max` - maximum value (inclusive), should be equal or larger than min.
|
||||
|
||||
#### lognormal
|
||||
|
||||
```json
|
||||
{
|
||||
"distribution": "lognormal",
|
||||
"average": 1000,
|
||||
"max": 5000
|
||||
}
|
||||
```
|
||||
|
||||
You can parameterize the lognormal distribution in one of two ways:
|
||||
|
||||
Using the average and optional median ratio:
|
||||
|
||||
* `average` - target average value of the distribution.
|
||||
* `median_ratio` - the ratio of the median to the average; controls the skewness. Must be in the range (0, 1).
|
||||
|
||||
Using the parameters of the underlying normal distribution:
|
||||
|
||||
* `mean` - mean of the underlying normal distribution.
|
||||
* `sigma` - standard deviation of the underlying normal distribution.
|
||||
|
||||
#### zipf
|
||||
|
||||
```json
|
||||
{
|
||||
"distribution": "zipf",
|
||||
"alpha": 1.2,
|
||||
"max": 100
|
||||
}
|
||||
```
|
||||
|
||||
* `alpha` - skew parameter (> 1). Larger values produce stronger skew toward smaller integers.
|
||||
|
||||
#### poisson
|
||||
|
||||
```json
|
||||
{
|
||||
"distribution": "poisson",
|
||||
"alpha": 10,
|
||||
"max": 50
|
||||
}
|
||||
```
|
||||
|
||||
* `alpha` - expected value (λ). Also the variance of the distribution.
|
||||
|
||||
## ShareGPT Conversations
|
||||
|
||||
To run with the ShareGPT data, download the following ShareGPT dataset:
|
||||
|
@ -99,21 +99,105 @@ class PoissonDistribution(Distribution):
|
||||
|
||||
class LognormalDistribution(Distribution):
|
||||
def __init__(
|
||||
self, mean: float, sigma: float, max_val: Optional[int] = None
|
||||
self,
|
||||
mean: Optional[float] = None,
|
||||
sigma: Optional[float] = None,
|
||||
average: Optional[int] = None,
|
||||
median_ratio: Optional[float] = None,
|
||||
max_val: Optional[int] = None,
|
||||
) -> None:
|
||||
self.average = average
|
||||
self.median_ratio = median_ratio
|
||||
self.max_val = max_val
|
||||
|
||||
if average is not None:
|
||||
if average < 1:
|
||||
raise ValueError("Lognormal average must be positive")
|
||||
|
||||
if mean or sigma:
|
||||
raise ValueError(
|
||||
"When using lognormal average, you can't provide mean/sigma"
|
||||
)
|
||||
|
||||
if self.median_ratio is None:
|
||||
# Default value that provides relatively wide range of values
|
||||
self.median_ratio = 0.85
|
||||
|
||||
# Calculate mean/sigma of np.random.lognormal based on the average
|
||||
mean, sigma = self._generate_lognormal_by_median(
|
||||
target_average=self.average, median_ratio=self.median_ratio
|
||||
)
|
||||
else:
|
||||
if mean is None or sigma is None:
|
||||
raise ValueError(
|
||||
"Must provide both mean and sigma if average is not used"
|
||||
)
|
||||
|
||||
if mean <= 0 or sigma < 0:
|
||||
raise ValueError(
|
||||
"Lognormal mean must be positive and sigma must be non-negative"
|
||||
)
|
||||
|
||||
# Mean and standard deviation of the underlying normal distribution
|
||||
# Based on numpy.random.lognormal
|
||||
self.mean = mean
|
||||
self.sigma = sigma
|
||||
self.max_val = max_val
|
||||
|
||||
@staticmethod
|
||||
def _generate_lognormal_by_median(
|
||||
target_average: int, median_ratio: float
|
||||
) -> tuple[float, float]:
|
||||
"""
|
||||
Compute (mu, sigma) for a lognormal distribution given:
|
||||
- a target average (mean of the distribution)
|
||||
- a ratio of median / mean (controls skewness), assume mean > median
|
||||
|
||||
Background:
|
||||
If Z ~ Normal(mu, sigma^2), then X = exp(Z) ~ LogNormal(mu, sigma).
|
||||
* mean(X) = exp(mu + sigma^2 / 2)
|
||||
* median(X) = exp(mu)
|
||||
|
||||
So:
|
||||
median / mean = exp(mu) / exp(mu + sigma^2 / 2)
|
||||
= exp(-sigma^2 / 2)
|
||||
|
||||
Rearranging:
|
||||
sigma^2 = 2 * ln(mean / median)
|
||||
mu = ln(median)
|
||||
|
||||
This gives a unique (mu, sigma) for any valid mean and median.
|
||||
"""
|
||||
# Check input validity: median must be smaller than mean
|
||||
if median_ratio <= 0 or median_ratio >= 1:
|
||||
raise ValueError("median_ratio must be in range (0, 1)")
|
||||
|
||||
target_median = target_average * median_ratio
|
||||
|
||||
# Solve sigma^2 = 2 * ln(mean / median)
|
||||
sigma = np.sqrt(2 * np.log(target_average / target_median))
|
||||
mu = np.log(target_median)
|
||||
|
||||
return mu, sigma
|
||||
|
||||
def sample(self, size: int = 1) -> np.ndarray:
|
||||
samples = np.random.lognormal(mean=self.mean, sigma=self.sigma, size=size)
|
||||
|
||||
if self.average is not None:
|
||||
# Scale to average
|
||||
samples *= self.average / samples.mean()
|
||||
|
||||
if self.max_val:
|
||||
samples = np.minimum(samples, self.max_val)
|
||||
|
||||
return np.round(samples).astype(int)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"LognormalDistribution[{self.mean}, {self.sigma}]"
|
||||
if self.average:
|
||||
return (
|
||||
f"LognormalDistribution[{self.average}, "
|
||||
f"{self.median_ratio}, {self.max_val}]"
|
||||
)
|
||||
return f"LognormalDistribution[{self.mean}, {self.sigma}, {self.max_val}]"
|
||||
|
||||
|
||||
class GenConvArgs(NamedTuple):
|
||||
@ -173,10 +257,21 @@ def get_random_distribution(
|
||||
return PoissonDistribution(conf["alpha"], max_val=max_val)
|
||||
|
||||
elif distribution == "lognormal":
|
||||
max_val = conf.get("max", None)
|
||||
|
||||
if "average" in conf:
|
||||
# Infer lognormal mean/sigma (numpy) from input average
|
||||
median_ratio = conf.get("median_ratio", None)
|
||||
return LognormalDistribution(
|
||||
average=conf["average"], median_ratio=median_ratio, max_val=max_val
|
||||
)
|
||||
|
||||
# Use mean/sigma directly (for full control over the distribution)
|
||||
verify_field_exists(conf, "mean", section, subsection)
|
||||
verify_field_exists(conf, "sigma", section, subsection)
|
||||
max_val = conf.get("max", None)
|
||||
return LognormalDistribution(conf["mean"], conf["sigma"], max_val=max_val)
|
||||
return LognormalDistribution(
|
||||
mean=conf["mean"], sigma=conf["sigma"], max_val=max_val
|
||||
)
|
||||
|
||||
elif distribution == "uniform":
|
||||
verify_field_exists(conf, "min", section, subsection)
|
||||
|
@ -15,9 +15,8 @@
|
||||
},
|
||||
"prefix_num_tokens": {
|
||||
"distribution": "lognormal",
|
||||
"mean": 6,
|
||||
"sigma": 4,
|
||||
"max": 1500
|
||||
"average": 1000,
|
||||
"max": 5000
|
||||
},
|
||||
"num_tokens": {
|
||||
"distribution": "uniform",
|
||||
|
Reference in New Issue
Block a user