CI Enable 5 test cases on XPU (#2442)

Signed-off-by: Yao, Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix
2025-03-25 20:43:14 +08:00
committed by GitHub
parent 15106f53b7
commit 8d935a63c2
3 changed files with 22 additions and 20 deletions

View File

@ -22,6 +22,7 @@ import torch
from accelerate import notebook_launcher from accelerate import notebook_launcher
import peft import peft
from peft.utils import infer_device
def init(): def init():
@ -33,7 +34,8 @@ def init():
def forward(self, x): def forward(self, x):
return self.linear(x) return self.linear(x)
model = MyModule().to("cuda") device = infer_device()
model = MyModule().to(device)
peft.get_peft_model(model, peft.LoraConfig(target_modules=["linear"])) peft.get_peft_model(model, peft.LoraConfig(target_modules=["linear"]))

View File

@ -62,7 +62,6 @@ from .testing_utils import (
require_bitsandbytes, require_bitsandbytes,
require_multi_accelerator, require_multi_accelerator,
require_non_cpu, require_non_cpu,
require_torch_gpu,
) )
@ -1252,7 +1251,7 @@ class PeftGPUCommonTests(unittest.TestCase):
assert torch.allclose(out_dora, out_unmerged, atol=atol, rtol=rtol) assert torch.allclose(out_dora, out_unmerged, atol=atol, rtol=rtol)
assert torch.allclose(out_dora, out_unloaded, atol=atol, rtol=rtol) assert torch.allclose(out_dora, out_unloaded, atol=atol, rtol=rtol)
@require_torch_gpu @require_non_cpu
@pytest.mark.single_gpu_tests @pytest.mark.single_gpu_tests
@require_bitsandbytes @require_bitsandbytes
def test_8bit_dora_merging(self): def test_8bit_dora_merging(self):

View File

@ -79,6 +79,7 @@ from peft.utils.loftq_utils import NFQuantizer
from peft.utils.other import fsdp_auto_wrap_policy from peft.utils.other import fsdp_auto_wrap_policy
from .testing_utils import ( from .testing_utils import (
device_count,
require_aqlm, require_aqlm,
require_auto_awq, require_auto_awq,
require_auto_gptq, require_auto_gptq,
@ -302,7 +303,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_4bit=True), quantization_config=BitsAndBytesConfig(load_in_4bit=True),
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -424,7 +425,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
assert trainer.state.log_history[-1]["train_loss"] is not None assert trainer.state.log_history[-1]["train_loss"] is not None
@pytest.mark.single_gpu_tests @pytest.mark.single_gpu_tests
@require_torch_gpu @require_non_cpu
def test_8bit_adalora_causalLM(self): def test_8bit_adalora_causalLM(self):
r""" r"""
Tests the 8bit training with adalora Tests the 8bit training with adalora
@ -497,7 +498,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
assert trainer.state.log_history[-1]["train_loss"] is not None assert trainer.state.log_history[-1]["train_loss"] is not None
@pytest.mark.multi_gpu_tests @pytest.mark.multi_gpu_tests
@require_torch_multi_gpu @require_multi_accelerator
def test_causal_lm_training_multi_gpu(self): def test_causal_lm_training_multi_gpu(self):
r""" r"""
Test the CausalLM training on a multi-GPU device. This test is a converted version of Test the CausalLM training on a multi-GPU device. This test is a converted version of
@ -511,8 +512,8 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_8bit=True), quantization_config=BitsAndBytesConfig(load_in_8bit=True),
device_map="auto", device_map="auto",
) )
print(f"device map: {model.hf_device_map}")
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id) tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -621,7 +622,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
assert trainer.state.log_history[-1]["train_loss"] is not None assert trainer.state.log_history[-1]["train_loss"] is not None
@pytest.mark.multi_gpu_tests @pytest.mark.multi_gpu_tests
@require_torch_multi_gpu @require_multi_accelerator
def test_seq2seq_lm_training_multi_gpu(self): def test_seq2seq_lm_training_multi_gpu(self):
r""" r"""
Test the Seq2SeqLM training on a multi-GPU device. This test is a converted version of Test the Seq2SeqLM training on a multi-GPU device. This test is a converted version of
@ -636,7 +637,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
device_map="balanced", device_map="balanced",
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id) tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -920,7 +921,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_4bit=True), quantization_config=BitsAndBytesConfig(load_in_4bit=True),
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -1037,7 +1038,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_8bit=True), quantization_config=BitsAndBytesConfig(load_in_8bit=True),
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -1284,7 +1285,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_8bit=True), quantization_config=BitsAndBytesConfig(load_in_8bit=True),
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -1343,7 +1344,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
quantization_config=BitsAndBytesConfig(load_in_4bit=True), quantization_config=BitsAndBytesConfig(load_in_4bit=True),
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -1656,7 +1657,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
quantization_config=self.quantization_config, quantization_config=self.quantization_config,
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -2552,7 +2553,7 @@ class TestLoftQ:
@require_bitsandbytes @require_bitsandbytes
@require_torch_gpu @require_non_cpu
class MultiprocessTester(unittest.TestCase): class MultiprocessTester(unittest.TestCase):
def test_notebook_launcher(self): def test_notebook_launcher(self):
script_path = os.path.join("scripts", "launch_notebook_mp.py") script_path = os.path.join("scripts", "launch_notebook_mp.py")
@ -3187,7 +3188,7 @@ class PeftAwqGPUTests(unittest.TestCase):
device_map="auto", device_map="auto",
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -3335,7 +3336,7 @@ class PeftEetqGPUTests(unittest.TestCase):
quantization_config=quantization_config, quantization_config=quantization_config,
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
@ -3586,7 +3587,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
torch_dtype=torch.bfloat16, torch_dtype=torch.bfloat16,
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
model.model_parallel = True model.model_parallel = True
@ -3646,7 +3647,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
torch_dtype=torch.bfloat16, torch_dtype=torch.bfloat16,
) )
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count())) assert set(model.hf_device_map.values()) == set(range(device_count))
model = prepare_model_for_kbit_training(model) model = prepare_model_for_kbit_training(model)
model.model_parallel = True model.model_parallel = True