Files
vllm/tests/v1/worker/test_gpu_block_table.py
Woosuk Kwon 7097f31955 test
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-01-15 03:22:32 -08:00

53 lines
1.5 KiB
Python

import pytest
import random
import time
import torch
from vllm.v1.worker.gpu_block_table import BlockTable
MAX_NUM_REQS = 1024
MAX_MODEL_LEN = 128 * 1024
BLOCK_SIZE = 16
MAX_NUM_BLOCKS_PER_REQ = MAX_MODEL_LEN // BLOCK_SIZE
def test_block_table(do_wait: bool):
random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
block_table = BlockTable(
max_num_reqs=MAX_NUM_REQS,
max_model_len=MAX_MODEL_LEN,
max_num_blocks_per_req=MAX_NUM_BLOCKS_PER_REQ,
pin_memory=True,
device=torch.device(0),
)
num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 1)
block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu")
block_table.add_row(0, block_ids)
num_blocks = random.randint(1, MAX_NUM_BLOCKS_PER_REQ - 100)
block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (num_blocks,), dtype=torch.int32, device="cpu")
block_table.add_row(1, block_ids)
block_table.commit(2)
torch.cuda.synchronize()
if do_wait:
time.sleep(1)
block_ids = torch.randint(0, MAX_NUM_BLOCKS_PER_REQ, (100,), dtype=torch.int32, device="cpu")
block_table.append_row(1, num_blocks, block_ids)
block_table.move_row(1, 0)
block_table.commit(2)
torch.cuda.synchronize()
if do_wait:
time.sleep(1)
torch.testing.assert_close(block_table.block_table[:1].cpu(), block_table.block_table_cpu[:1])
if __name__ == "__main__":
test_block_table(do_wait=False)