Files
DeepSpeed/tests/unit/runtime/test_multi_output_model.py
Yuanyuan Chen 1c03d1b1bb Fix invalid f-strings (#7457)
Fix invalid f-strings detected by ruff.

---------

Signed-off-by: cyy <cyyever@outlook.com>
Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
Co-authored-by: Olatunji Ruwase <tunji.ruwase@snowflake.com>
Co-authored-by: Michael Wyatt <michael.wyatt@snowflake.com>
2025-08-16 18:22:19 +00:00

129 lines
5.1 KiB
Python

# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import torch
import deepspeed
from deepspeed.accelerator import get_accelerator
from pytest import approx
from unit.common import DistributedTest, preferred_dtype
from unit.multi_output_model import MultiOutputModel, multi_output_dataloader
class TestTwoOutputModel(DistributedTest):
world_size = 1
def test(self, tmpdir):
grad_accumulation_steps = 2
micro_batch_size = 1
world_size = self.world_size
config_dict = {
"train_micro_batch_size_per_gpu": micro_batch_size,
"gradient_accumulation_steps": grad_accumulation_steps,
"train_batch_size": micro_batch_size * grad_accumulation_steps * world_size,
"steps_per_print": 1,
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00015
}
},
}
if get_accelerator().is_bf16_supported():
config_dict["bf16"] = {"enabled": True}
elif get_accelerator().is_fp16_supported():
config_dict["fp16"] = {"enabled": True}
hidden_dim = 10
weight_value = 0.1
model = MultiOutputModel(hidden_dim, weight_value)
model, _, _, _ = deepspeed.initialize(config=config_dict, model=model, model_parameters=model.parameters())
total_samples = 4
data_loader = multi_output_dataloader(model=model,
total_samples=total_samples,
hidden_dim=hidden_dim,
device=model.device,
inputs=[1.0, 2.0],
targets=[1, 2])
for n, batch in enumerate(data_loader):
assert len(batch) % 2 == 0, \
"multi_output_dataloader failed to return even number of data samples (input+target)"
midpoint = len(batch) // 2
inputs, targets = batch[:midpoint], batch[midpoint:]
loss_tuple = model(inputs, targets)
expected_loss = torch.tensor(2.302734375, dtype=preferred_dtype(), device=model.device)
for loss in loss_tuple:
assert loss.shape == torch.Size([])
assert loss.item() == approx(expected_loss.item())
summed_loss = sum(loss_tuple)
scaled_loss = model.backward(summed_loss)
expected_scaled_loss = summed_loss.float() / grad_accumulation_steps
assert scaled_loss.item() == approx(expected_scaled_loss.item())
model.step()
class TestThreeOutputModel(DistributedTest):
world_size = 1
def test(self, tmpdir):
grad_accumulation_steps = 3
micro_batch_size = 1
world_size = 1
config_dict = {
"train_micro_batch_size_per_gpu": micro_batch_size,
"gradient_accumulation_steps": grad_accumulation_steps,
"train_batch_size": micro_batch_size * grad_accumulation_steps * world_size,
"steps_per_print": 1,
"optimizer": {
"type": "Adam",
"params": {
"lr": 0.00015
}
},
}
if get_accelerator().is_bf16_supported():
config_dict["bf16"] = {"enabled": True}
elif get_accelerator().is_fp16_supported():
config_dict["fp16"] = {"enabled": True}
hidden_dim = 10
weight_value = 0.1
model = MultiOutputModel(hidden_dim, weight_value)
model, _, _, _ = deepspeed.initialize(config=config_dict, model=model, model_parameters=model.parameters())
total_samples = grad_accumulation_steps * micro_batch_size * 2
data_loader = multi_output_dataloader(model=model,
total_samples=total_samples,
hidden_dim=hidden_dim,
device=model.device,
inputs=[1.0, 2.0, 3.0],
targets=[1, 2, 3])
for n, batch in enumerate(data_loader):
assert len(batch) % 2 == 0, \
"multi_output_dataloader failed to return even number of data samples (input+target)"
midpoint = len(batch) // 2
inputs, targets = batch[:midpoint], batch[midpoint:]
loss_tuple = model(inputs, targets)
assert len(loss_tuple) == 3
expected_loss = torch.tensor(2.302734375, dtype=preferred_dtype(), device=model.device)
for loss in loss_tuple:
assert loss.shape == torch.Size([])
assert loss.item() == approx(expected_loss.item())
summed_loss = sum(loss_tuple)
scaled_loss = model.backward(summed_loss)
expected_scaled_loss = summed_loss.float() / grad_accumulation_steps
assert scaled_loss.item() == approx(expected_scaled_loss.item())
model.step()