mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
53 lines
1.5 KiB
Python
53 lines
1.5 KiB
Python
import pytest
|
|
import random
|
|
import time
|
|
|
|
import torch
|
|
|
|
from vllm.v1.worker.gpu_block_table import BlockTable
|
|
|
|
MAX_NUM_REQS = 1024
|
|
MAX_MODEL_LEN = 128 * 1024
|
|
BLOCK_SIZE = 16
|
|
MAX_NUM_BLOCKS_PER_REQ = MAX_MODEL_LEN // BLOCK_SIZE
|
|
|
|
|
|
def test_block_table(do_wait: bool):
|
|
random.seed(0)
|
|
torch.manual_seed(0)
|
|
torch.cuda.manual_seed_all(0)
|
|
|
|
block_table = BlockTable(
|
|
max_num_reqs=MAX_NUM_REQS,
|
|
max_model_len=MAX_MODEL_LEN,
|
|
max_num_blocks_per_req=MAX_NUM_BLOCKS_PER_REQ,
|
|
pin_memory=True,
|
|
device=torch.device(0),
|
|
)
|
|
|
|
num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 1)
|
|
block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu")
|
|
block_table.add_row(0, block_ids)
|
|
num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 100)
|
|
block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu")
|
|
block_table.add_row(1, block_ids)
|
|
block_table.commit(2)
|
|
|
|
torch.cuda.synchronize()
|
|
if do_wait:
|
|
time.sleep(1)
|
|
|
|
block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (100,), dtype=torch.int32, device="cpu")
|
|
block_table.append_row(1, num_blocks, block_ids)
|
|
block_table.move_row(1, 0)
|
|
block_table.commit(2)
|
|
|
|
torch.cuda.synchronize()
|
|
if do_wait:
|
|
time.sleep(1)
|
|
|
|
torch.testing.assert_close(block_table.block_table[:1].cpu(), block_table.block_table_cpu[:1])
|
|
|
|
if __name__ == "__main__":
|
|
test_block_table(do_wait=False)
|