mirror of
https://github.com/huggingface/accelerate.git
synced 2025-10-20 18:13:46 +08:00
Compare commits
12 Commits
v1.5.1
...
debug-test
Author | SHA1 | Date | |
---|---|---|---|
7f48cb52e7 | |||
e33aba7371 | |||
068d586938 | |||
76043b402f | |||
3126992054 | |||
656e15e4f8 | |||
1b21f9a630 | |||
f592aad8df | |||
b69239577f | |||
f973f0d5f9 | |||
56580b40c5 | |||
30ac26cf33 |
324
.github/workflows/nightly.yml
vendored
324
.github/workflows/nightly.yml
vendored
@ -44,186 +44,186 @@ jobs:
|
||||
source activate accelerate
|
||||
make test
|
||||
|
||||
- name: Run examples on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
pip uninstall comet_ml -y
|
||||
make test_examples
|
||||
# - name: Run examples on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# pip uninstall comet_ml -y
|
||||
# make test_examples
|
||||
|
||||
- name: Generate Report
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
pip install slack_sdk tabulate
|
||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
# - name: Generate Report
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# pip install slack_sdk tabulate
|
||||
# python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
run_deepspeed_tests_single_gpu:
|
||||
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci]
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0"
|
||||
TEST_TYPE: "single_gpu_deepspeed"
|
||||
container:
|
||||
image: huggingface/accelerate:gpu-deepspeed-nightly
|
||||
options: --gpus all --shm-size "16gb"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Update clone & pip install
|
||||
run: |
|
||||
source activate accelerate
|
||||
git clone https://github.com/huggingface/accelerate;
|
||||
cd accelerate;
|
||||
git checkout ${{ github.sha }};
|
||||
pip install -e . --no-deps
|
||||
pip install pytest-reportlog tabulate
|
||||
# run_deepspeed_tests_single_gpu:
|
||||
# runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci]
|
||||
# env:
|
||||
# CUDA_VISIBLE_DEVICES: "0"
|
||||
# TEST_TYPE: "single_gpu_deepspeed"
|
||||
# container:
|
||||
# image: huggingface/accelerate:gpu-deepspeed-nightly
|
||||
# options: --gpus all --shm-size "16gb"
|
||||
# defaults:
|
||||
# run:
|
||||
# shell: bash
|
||||
# steps:
|
||||
# - name: Update clone & pip install
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# git clone https://github.com/huggingface/accelerate;
|
||||
# cd accelerate;
|
||||
# git checkout ${{ github.sha }};
|
||||
# pip install -e . --no-deps
|
||||
# pip install pytest-reportlog tabulate
|
||||
|
||||
- name: Show installed libraries
|
||||
run: |
|
||||
source activate accelerate;
|
||||
pip freeze
|
||||
# - name: Show installed libraries
|
||||
# run: |
|
||||
# source activate accelerate;
|
||||
# pip freeze
|
||||
|
||||
- name: Run test on GPUs
|
||||
working-directory: accelerate
|
||||
run: |
|
||||
source activate accelerate
|
||||
make test_deepspeed
|
||||
# - name: Run test on GPUs
|
||||
# working-directory: accelerate
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# make test_deepspeed
|
||||
|
||||
- name: Run Integration tests on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
make test_integrations
|
||||
# - name: Run Integration tests on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# make test_integrations
|
||||
|
||||
- name: Run examples on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
pip uninstall comet_ml -y
|
||||
make test_examples
|
||||
# - name: Run examples on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# pip uninstall comet_ml -y
|
||||
# make test_examples
|
||||
|
||||
- name: Generate Report
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
pip install slack_sdk tabulate
|
||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
# - name: Generate Report
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# pip install slack_sdk tabulate
|
||||
# python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
run_core_tests_multi_gpu:
|
||||
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0,1"
|
||||
TEST_TYPE: "multi_gpu"
|
||||
container:
|
||||
image: huggingface/accelerate:gpu-nightly
|
||||
options: --gpus all --shm-size "16gb"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Update clone
|
||||
run: |
|
||||
source activate accelerate
|
||||
git clone https://github.com/huggingface/accelerate;
|
||||
cd accelerate;
|
||||
git checkout ${{ github.sha }};
|
||||
pip install -e . --no-deps
|
||||
pip install pytest-reportlog tabulate
|
||||
# run_core_tests_multi_gpu:
|
||||
# runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
|
||||
# env:
|
||||
# CUDA_VISIBLE_DEVICES: "0,1"
|
||||
# TEST_TYPE: "multi_gpu"
|
||||
# container:
|
||||
# image: huggingface/accelerate:gpu-nightly
|
||||
# options: --gpus all --shm-size "16gb"
|
||||
# defaults:
|
||||
# run:
|
||||
# shell: bash
|
||||
# steps:
|
||||
# - name: Update clone
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# git clone https://github.com/huggingface/accelerate;
|
||||
# cd accelerate;
|
||||
# git checkout ${{ github.sha }};
|
||||
# pip install -e . --no-deps
|
||||
# pip install pytest-reportlog tabulate
|
||||
|
||||
- name: Show installed libraries
|
||||
run: |
|
||||
source activate accelerate;
|
||||
pip freeze
|
||||
# - name: Show installed libraries
|
||||
# run: |
|
||||
# source activate accelerate;
|
||||
# pip freeze
|
||||
|
||||
- name: Run core and big modeling tests on GPUs
|
||||
working-directory: accelerate
|
||||
run: |
|
||||
source activate accelerate
|
||||
make test_core
|
||||
make test_big_modeling
|
||||
make test_cli
|
||||
# - name: Run core and big modeling tests on GPUs
|
||||
# working-directory: accelerate
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# make test_core
|
||||
# make test_big_modeling
|
||||
# make test_cli
|
||||
|
||||
- name: Run Integration tests on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
make test_integrations
|
||||
# - name: Run Integration tests on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# make test_integrations
|
||||
|
||||
- name: Run examples on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
pip uninstall comet_ml -y
|
||||
make test_examples
|
||||
# - name: Run examples on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# pip uninstall comet_ml -y
|
||||
# make test_examples
|
||||
|
||||
- name: Generate Report
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
pip install slack_sdk tabulate
|
||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
# - name: Generate Report
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# pip install slack_sdk tabulate
|
||||
# python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
run_deepspeed_tests_multi_gpu:
|
||||
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
|
||||
env:
|
||||
CUDA_VISIBLE_DEVICES: "0,1"
|
||||
TEST_TYPE: "multi_gpu_deepspeed"
|
||||
container:
|
||||
image: huggingface/accelerate:gpu-deepspeed-nightly
|
||||
options: --gpus all --shm-size "16gb"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Update clone
|
||||
run: |
|
||||
source activate accelerate
|
||||
git clone https://github.com/huggingface/accelerate;
|
||||
cd accelerate;
|
||||
git checkout ${{ github.sha }};
|
||||
pip install -e . --no-deps
|
||||
pip install pytest-reportlog tabulate
|
||||
# run_deepspeed_tests_multi_gpu:
|
||||
# runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
|
||||
# env:
|
||||
# CUDA_VISIBLE_DEVICES: "0,1"
|
||||
# TEST_TYPE: "multi_gpu_deepspeed"
|
||||
# container:
|
||||
# image: huggingface/accelerate:gpu-deepspeed-nightly
|
||||
# options: --gpus all --shm-size "16gb"
|
||||
# defaults:
|
||||
# run:
|
||||
# shell: bash
|
||||
# steps:
|
||||
# - name: Update clone
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# git clone https://github.com/huggingface/accelerate;
|
||||
# cd accelerate;
|
||||
# git checkout ${{ github.sha }};
|
||||
# pip install -e . --no-deps
|
||||
# pip install pytest-reportlog tabulate
|
||||
|
||||
- name: Show installed libraries
|
||||
run: |
|
||||
source activate accelerate;
|
||||
pip freeze
|
||||
# - name: Show installed libraries
|
||||
# run: |
|
||||
# source activate accelerate;
|
||||
# pip freeze
|
||||
|
||||
- name: Run DeepSpeed tests
|
||||
working-directory: accelerate
|
||||
run: |
|
||||
source activate accelerate
|
||||
make test_deepspeed
|
||||
# - name: Run DeepSpeed tests
|
||||
# working-directory: accelerate
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# make test_deepspeed
|
||||
|
||||
- name: Run Integration tests on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
make test_integrations
|
||||
# - name: Run Integration tests on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# make test_integrations
|
||||
|
||||
- name: Run examples on GPUs
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
source activate accelerate
|
||||
pip uninstall comet_ml -y
|
||||
make test_examples
|
||||
# - name: Run examples on GPUs
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# source activate accelerate
|
||||
# pip uninstall comet_ml -y
|
||||
# make test_examples
|
||||
|
||||
- name: Generate Report
|
||||
working-directory: accelerate
|
||||
if: always()
|
||||
run: |
|
||||
pip install slack_sdk tabulate
|
||||
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
# - name: Generate Report
|
||||
# working-directory: accelerate
|
||||
# if: always()
|
||||
# run: |
|
||||
# pip install slack_sdk tabulate
|
||||
# python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
|
||||
run-integration-tests:
|
||||
if: always()
|
||||
uses: ./.github/workflows/self_hosted_integration_tests.yml
|
||||
# run-integration-tests:
|
||||
# if: always()
|
||||
# uses: ./.github/workflows/self_hosted_integration_tests.yml
|
4
Makefile
4
Makefile
@ -42,11 +42,7 @@ test_fsdp:
|
||||
# Since the new version of pytest will *change* how things are collected, we need `deepspeed` to
|
||||
# run after test_core and test_cli
|
||||
test:
|
||||
$(MAKE) test_core
|
||||
$(MAKE) test_cli
|
||||
$(MAKE) test_big_modeling
|
||||
$(MAKE) test_deepspeed
|
||||
$(MAKE) test_fsdp
|
||||
|
||||
test_examples:
|
||||
python -m pytest -s -v ./tests/test_examples.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_examples.log",)
|
||||
|
@ -397,6 +397,7 @@ def dispatch_model(
|
||||
weights_map = OffloadedWeightsLoader(
|
||||
state_dict=state_dict, save_folder=save_folder, index=offload_index, device=device
|
||||
)
|
||||
print(weights_map)
|
||||
else:
|
||||
weights_map = None
|
||||
|
||||
@ -415,7 +416,6 @@ def dispatch_model(
|
||||
|
||||
# Note: To handle the disk offloading case, we can not simply use weights_map[param_name].data_ptr() as the reference pointer,
|
||||
# as we have no guarantee that safetensors' `file.get_tensor()` will always give the same pointer.
|
||||
|
||||
attach_align_device_hook_on_blocks(
|
||||
model,
|
||||
execution_device=execution_device,
|
||||
|
@ -340,6 +340,11 @@ def set_module_tensor_to_device(
|
||||
and value.data_ptr() in tied_params_map
|
||||
and device in tied_params_map[value.data_ptr()]
|
||||
):
|
||||
print("using value from tied_params_map value")
|
||||
print(tensor_name)
|
||||
print(value)
|
||||
print(value.data_ptr())
|
||||
print(tied_params_map[value.data_ptr()][device])
|
||||
module._parameters[tensor_name] = tied_params_map[value.data_ptr()][device]
|
||||
return
|
||||
elif (
|
||||
@ -347,6 +352,11 @@ def set_module_tensor_to_device(
|
||||
and old_value.data_ptr() in tied_params_map
|
||||
and device in tied_params_map[old_value.data_ptr()]
|
||||
):
|
||||
print("using value from tied_params_map old_value")
|
||||
print(tensor_name)
|
||||
print(value)
|
||||
print(old_value.data_ptr())
|
||||
print(tied_params_map[old_value.data_ptr()][device])
|
||||
module._parameters[tensor_name] = tied_params_map[old_value.data_ptr()][device]
|
||||
return
|
||||
|
||||
@ -466,6 +476,8 @@ def set_module_tensor_to_device(
|
||||
and device not in tied_params_map[old_value.data_ptr()]
|
||||
):
|
||||
tied_params_map[old_value.data_ptr()][device] = new_value
|
||||
print("tied_map updated 1 ")
|
||||
print(tied_params_map)
|
||||
elif (
|
||||
value is not None
|
||||
and tied_params_map is not None
|
||||
@ -473,6 +485,8 @@ def set_module_tensor_to_device(
|
||||
and device not in tied_params_map[value.data_ptr()]
|
||||
):
|
||||
tied_params_map[value.data_ptr()][device] = new_value
|
||||
print("tied_map updated 2")
|
||||
print(tied_params_map)
|
||||
|
||||
|
||||
def named_module_tensors(
|
||||
|
@ -145,6 +145,50 @@ class ModelWithUnusedSubModulesForTest(nn.Module):
|
||||
return self.linear4(self.linear3(self.batchnorm(self.linear2(self.linear1(x)))))
|
||||
|
||||
|
||||
# To test dispatch with tied weights
|
||||
class SubModule(torch.nn.Module):
|
||||
def __init__(self, ref_to_parameter):
|
||||
super().__init__()
|
||||
self.parameter = ref_to_parameter
|
||||
|
||||
def forward(self, x):
|
||||
return x + torch.max(self.parameter)
|
||||
|
||||
|
||||
class LinearModuleAndSubModule(torch.nn.Linear):
|
||||
def __init__(self, in_features, out_features, name):
|
||||
super().__init__(in_features, out_features, bias=False)
|
||||
print("init weights")
|
||||
self.name = name
|
||||
self.weight_submodule = SubModule(self.weight)
|
||||
self.weight_submodule2 = SubModule(self.weight)
|
||||
self.weight_submodule3 = SubModule(self.weight)
|
||||
self.weight_submodule4 = SubModule(self.weight)
|
||||
|
||||
def forward(self, x):
|
||||
print("weight")
|
||||
print(self.weight)
|
||||
print("name")
|
||||
print(self.name)
|
||||
a = torch.nn.functional.linear(self.weight_submodule(x), self.weight)
|
||||
b = torch.nn.functional.linear(self.weight_submodule2(x), self.weight)
|
||||
c = torch.nn.functional.linear(self.weight_submodule3(x), self.weight)
|
||||
d = torch.nn.functional.linear(self.weight_submodule4(x), self.weight)
|
||||
return a + b + c + d
|
||||
|
||||
|
||||
class ModelWithSubmodules(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.module1 = LinearModuleAndSubModule(5000, 5000, "1")
|
||||
self.module2 = LinearModuleAndSubModule(5000, 5000, "2")
|
||||
|
||||
def forward(self, x):
|
||||
a = self.module1(x)
|
||||
b = self.module2(x)
|
||||
return a + b
|
||||
|
||||
|
||||
class BigModelingTester(unittest.TestCase):
|
||||
def test_init_empty_weights(self):
|
||||
# base use
|
||||
@ -490,42 +534,8 @@ class BigModelingTester(unittest.TestCase):
|
||||
|
||||
torch.cuda.empty_cache() # Needed in case we run several tests in a row.
|
||||
|
||||
class SubModule(torch.nn.Module):
|
||||
def __init__(self, ref_to_parameter):
|
||||
super().__init__()
|
||||
self.parameter = ref_to_parameter
|
||||
|
||||
def forward(self, x):
|
||||
return x + torch.max(self.parameter)
|
||||
|
||||
class LinearModuleAndSubModule(torch.nn.Linear):
|
||||
def __init__(self, in_features, out_features):
|
||||
super().__init__(in_features, out_features, bias=False)
|
||||
self.weight_submodule = SubModule(self.weight)
|
||||
self.weight_submodule2 = SubModule(self.weight)
|
||||
self.weight_submodule3 = SubModule(self.weight)
|
||||
self.weight_submodule4 = SubModule(self.weight)
|
||||
|
||||
def forward(self, x):
|
||||
a = torch.nn.functional.linear(self.weight_submodule(x), self.weight)
|
||||
b = torch.nn.functional.linear(self.weight_submodule2(x), self.weight)
|
||||
c = torch.nn.functional.linear(self.weight_submodule3(x), self.weight)
|
||||
d = torch.nn.functional.linear(self.weight_submodule4(x), self.weight)
|
||||
return a + b + c + d
|
||||
|
||||
class ModelWithSubmodules(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.compute = LinearModuleAndSubModule(5000, 5000)
|
||||
self.compute1 = LinearModuleAndSubModule(5000, 5000)
|
||||
|
||||
def forward(self, x):
|
||||
a = self.compute(x)
|
||||
b = self.compute1(x)
|
||||
return a + b
|
||||
|
||||
# We should need only 2 * 5000 * 5000 * 32 // 8 * 1e-6 = 200 MB on the device 0 for the whole model forward, and not 600 MB.
|
||||
device_map = {"compute": 0, "compute1": "disk"}
|
||||
device_map = {"module1": 0, "module2": "disk"}
|
||||
|
||||
model = ModelWithSubmodules()
|
||||
|
||||
@ -545,7 +555,13 @@ class BigModelingTester(unittest.TestCase):
|
||||
|
||||
free_memory_bytes_before_dispatch = torch.cuda.mem_get_info("cuda:0")[0]
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
print("before dispatch")
|
||||
print(model.module1.weight)
|
||||
print(model.module2.weight)
|
||||
dispatch_model(model, device_map, offload_dir=tmp_dir)
|
||||
print("after dispatch")
|
||||
print(model.module1.weight)
|
||||
print(model.module2.weight)
|
||||
free_memory_bytes_after_dispatch = torch.cuda.mem_get_info("cuda:0")[0]
|
||||
|
||||
assert (free_memory_bytes_after_dispatch - free_memory_bytes_before_dispatch) * 1e-6 < 130
|
||||
@ -559,7 +575,6 @@ class BigModelingTester(unittest.TestCase):
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
assert torch.allclose(expected, output.cpu(), atol=1e-5)
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
@ -568,16 +583,16 @@ class BigModelingTester(unittest.TestCase):
|
||||
|
||||
# Check that we have no more references on GPU for the offloaded tied weight.
|
||||
n_non_empty = 0
|
||||
for pointer, pointer_dict in model.compute1.weight_submodule._hf_hook.tied_params_map.items():
|
||||
for pointer, pointer_dict in model.module1.weight_submodule._hf_hook.tied_params_map.items():
|
||||
if len(pointer_dict) > 0:
|
||||
n_non_empty += 1
|
||||
assert n_non_empty == 1 # `compute` layer one.
|
||||
assert n_non_empty == 1 # `module1` layer one.
|
||||
|
||||
n_non_empty = 0
|
||||
for pointer, pointer_dict in model.compute1._hf_hook.tied_params_map.items():
|
||||
for pointer, pointer_dict in model.module1._hf_hook.tied_params_map.items():
|
||||
if len(pointer_dict) > 0:
|
||||
n_non_empty += 1
|
||||
assert n_non_empty == 1 # `compute` layer one.
|
||||
assert n_non_empty == 1 # `module1` layer one.
|
||||
|
||||
assert (free_memory_bytes_after_infer - free_memory_bytes_after_dispatch) * 1e-6 < 130
|
||||
|
||||
|
Reference in New Issue
Block a user