bug

testing
tied_param
2025-10-20 18:13:46 +08:00 · 2024-05-15 16:46:44 +02:00 · 2024-05-15 16:45:38 +02:00 · 2024-05-15 16:30:30 +02:00 · 2024-05-15 16:17:17 +02:00 · 2024-05-15 15:55:46 +02:00
5 changed files with 232 additions and 207 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -44,186 +44,186 @@ jobs:
          source activate accelerate
          make test
          
-      - name: Run examples on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          pip uninstall comet_ml -y
-          make test_examples
+      # - name: Run examples on GPUs
+      #   working-directory: accelerate
+      #   if: always()
+      #   run: |
+      #     source activate accelerate
+      #     pip uninstall comet_ml -y
+      #     make test_examples
          
-      - name: Generate Report
-        working-directory: accelerate
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+      # - name: Generate Report
+      #   working-directory: accelerate
+      #   if: always()
+      #   run: |
+      #     pip install slack_sdk tabulate
+      #     python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

-  run_deepspeed_tests_single_gpu:
-    runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci]
-    env:
-      CUDA_VISIBLE_DEVICES: "0"
-      TEST_TYPE: "single_gpu_deepspeed"
-    container:
-      image: huggingface/accelerate:gpu-deepspeed-nightly
-      options: --gpus all --shm-size "16gb"
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - name: Update clone & pip install
-        run: |
-          source activate accelerate
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
-          pip install -e . --no-deps
-          pip install pytest-reportlog tabulate
+  # run_deepspeed_tests_single_gpu:
+  #   runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci]
+  #   env:
+  #     CUDA_VISIBLE_DEVICES: "0"
+  #     TEST_TYPE: "single_gpu_deepspeed"
+  #   container:
+  #     image: huggingface/accelerate:gpu-deepspeed-nightly
+  #     options: --gpus all --shm-size "16gb"
+  #   defaults:
+  #     run:
+  #       shell: bash
+  #   steps:
+  #     - name: Update clone & pip install
+  #       run: |
+  #         source activate accelerate
+  #         git clone https://github.com/huggingface/accelerate;
+  #         cd accelerate;
+  #         git checkout ${{ github.sha }};
+  #         pip install -e . --no-deps
+  #         pip install pytest-reportlog tabulate

-      - name: Show installed libraries
-        run: |
-          source activate accelerate;
-          pip freeze
+  #     - name: Show installed libraries
+  #       run: |
+  #         source activate accelerate;
+  #         pip freeze

-      - name: Run test on GPUs
-        working-directory: accelerate
-        run: |
-          source activate accelerate
-          make test_deepspeed
+  #     - name: Run test on GPUs
+  #       working-directory: accelerate
+  #       run: |
+  #         source activate accelerate
+  #         make test_deepspeed

-      - name: Run Integration tests on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          make test_integrations
+  #     - name: Run Integration tests on GPUs
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         source activate accelerate
+  #         make test_integrations

-      - name: Run examples on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          pip uninstall comet_ml -y
-          make test_examples
+  #     - name: Run examples on GPUs
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         source activate accelerate
+  #         pip uninstall comet_ml -y
+  #         make test_examples
          
-      - name: Generate Report
-        working-directory: accelerate
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+  #     - name: Generate Report
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         pip install slack_sdk tabulate
+  #         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

-  run_core_tests_multi_gpu:
-    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
-    env:
-      CUDA_VISIBLE_DEVICES: "0,1"
-      TEST_TYPE: "multi_gpu"
-    container:
-      image: huggingface/accelerate:gpu-nightly
-      options: --gpus all --shm-size "16gb"
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - name: Update clone
-        run: |
-          source activate accelerate
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
-          pip install -e . --no-deps
-          pip install pytest-reportlog tabulate
+  # run_core_tests_multi_gpu:
+  #   runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
+  #   env:
+  #     CUDA_VISIBLE_DEVICES: "0,1"
+  #     TEST_TYPE: "multi_gpu"
+  #   container:
+  #     image: huggingface/accelerate:gpu-nightly
+  #     options: --gpus all --shm-size "16gb"
+  #   defaults:
+  #     run:
+  #       shell: bash
+  #   steps:
+  #     - name: Update clone
+  #       run: |
+  #         source activate accelerate
+  #         git clone https://github.com/huggingface/accelerate;
+  #         cd accelerate;
+  #         git checkout ${{ github.sha }};
+  #         pip install -e . --no-deps
+  #         pip install pytest-reportlog tabulate

-      - name: Show installed libraries
-        run: |
-          source activate accelerate;
-          pip freeze
+  #     - name: Show installed libraries
+  #       run: |
+  #         source activate accelerate;
+  #         pip freeze

-      - name: Run core and big modeling tests on GPUs
-        working-directory: accelerate
-        run: |
-          source activate accelerate
-          make test_core
-          make test_big_modeling
-          make test_cli
+  #     - name: Run core and big modeling tests on GPUs
+  #       working-directory: accelerate
+  #       run: |
+  #         source activate accelerate
+  #         make test_core
+  #         make test_big_modeling
+  #         make test_cli

-      - name: Run Integration tests on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          make test_integrations
+  #     - name: Run Integration tests on GPUs
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         source activate accelerate
+  #         make test_integrations

-      - name: Run examples on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          pip uninstall comet_ml -y
-          make test_examples
+  #     - name: Run examples on GPUs
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         source activate accelerate
+  #         pip uninstall comet_ml -y
+  #         make test_examples

-      - name: Generate Report
-        working-directory: accelerate
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+  #     - name: Generate Report
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         pip install slack_sdk tabulate
+  #         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

-  run_deepspeed_tests_multi_gpu:
-    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
-    env:
-      CUDA_VISIBLE_DEVICES: "0,1"
-      TEST_TYPE: "multi_gpu_deepspeed"
-    container:
-      image: huggingface/accelerate:gpu-deepspeed-nightly
-      options: --gpus all --shm-size "16gb"
-    defaults:
-      run:
-        shell: bash
-    steps:
-      - name: Update clone
-        run: |
-          source activate accelerate
-          git clone https://github.com/huggingface/accelerate;
-          cd accelerate;
-          git checkout ${{ github.sha }};
-          pip install -e . --no-deps
-          pip install pytest-reportlog tabulate
+  # run_deepspeed_tests_multi_gpu:
+  #   runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
+  #   env:
+  #     CUDA_VISIBLE_DEVICES: "0,1"
+  #     TEST_TYPE: "multi_gpu_deepspeed"
+  #   container:
+  #     image: huggingface/accelerate:gpu-deepspeed-nightly
+  #     options: --gpus all --shm-size "16gb"
+  #   defaults:
+  #     run:
+  #       shell: bash
+  #   steps:
+  #     - name: Update clone
+  #       run: |
+  #         source activate accelerate
+  #         git clone https://github.com/huggingface/accelerate;
+  #         cd accelerate;
+  #         git checkout ${{ github.sha }};
+  #         pip install -e . --no-deps
+  #         pip install pytest-reportlog tabulate

-      - name: Show installed libraries
-        run: |
-          source activate accelerate;
-          pip freeze
+  #     - name: Show installed libraries
+  #       run: |
+  #         source activate accelerate;
+  #         pip freeze

-      - name: Run DeepSpeed tests
-        working-directory: accelerate
-        run: |
-          source activate accelerate
-          make test_deepspeed
+  #     - name: Run DeepSpeed tests
+  #       working-directory: accelerate
+  #       run: |
+  #         source activate accelerate
+  #         make test_deepspeed

-      - name: Run Integration tests on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          make test_integrations
+  #     - name: Run Integration tests on GPUs
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         source activate accelerate
+  #         make test_integrations

-      - name: Run examples on GPUs
-        working-directory: accelerate
-        if: always()
-        run: |
-          source activate accelerate
-          pip uninstall comet_ml -y
-          make test_examples
+  #     - name: Run examples on GPUs
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         source activate accelerate
+  #         pip uninstall comet_ml -y
+  #         make test_examples

-      - name: Generate Report
-        working-directory: accelerate
-        if: always()
-        run: |
-          pip install slack_sdk tabulate
-          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+  #     - name: Generate Report
+  #       working-directory: accelerate
+  #       if: always()
+  #       run: |
+  #         pip install slack_sdk tabulate
+  #         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY

  
-  run-integration-tests:
-    if: always()
-    uses: ./.github/workflows/self_hosted_integration_tests.yml
+  # run-integration-tests:
+  #   if: always()
+  #   uses: ./.github/workflows/self_hosted_integration_tests.yml
--- a/4
+++ b/4
@ -42,11 +42,7 @@ test_fsdp:
 # Since the new version of pytest will *change* how things are collected, we need `deepspeed` to 
 # run after test_core and test_cli
 test:
-	$(MAKE) test_core
-	$(MAKE) test_cli
 	$(MAKE) test_big_modeling
-	$(MAKE) test_deepspeed
-	$(MAKE) test_fsdp

 test_examples:
 	python -m pytest -s -v ./tests/test_examples.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_examples.log",)
--- a/src/accelerate/big_modeling.py
+++ b/src/accelerate/big_modeling.py
@ -397,6 +397,7 @@ def dispatch_model(
            weights_map = OffloadedWeightsLoader(
                state_dict=state_dict, save_folder=save_folder, index=offload_index, device=device
            )
+            print(weights_map)
        else:
            weights_map = None

@ -415,7 +416,6 @@ def dispatch_model(

                # Note: To handle the disk offloading case, we can not simply use weights_map[param_name].data_ptr() as the reference pointer,
                # as we have no guarantee that safetensors' `file.get_tensor()` will always give the same pointer.
-
        attach_align_device_hook_on_blocks(
            model,
            execution_device=execution_device,
--- a/src/accelerate/utils/modeling.py
+++ b/src/accelerate/utils/modeling.py
@ -340,6 +340,11 @@ def set_module_tensor_to_device(
        and value.data_ptr() in tied_params_map
        and device in tied_params_map[value.data_ptr()]
    ):
+        print("using value from tied_params_map value")
+        print(tensor_name)
+        print(value)
+        print(value.data_ptr())
+        print(tied_params_map[value.data_ptr()][device])
        module._parameters[tensor_name] = tied_params_map[value.data_ptr()][device]
        return
    elif (
@ -347,6 +352,11 @@ def set_module_tensor_to_device(
        and old_value.data_ptr() in tied_params_map
        and device in tied_params_map[old_value.data_ptr()]
    ):
+        print("using value from tied_params_map old_value")
+        print(tensor_name)
+        print(value)
+        print(old_value.data_ptr())
+        print(tied_params_map[old_value.data_ptr()][device])
        module._parameters[tensor_name] = tied_params_map[old_value.data_ptr()][device]
        return

@ -466,6 +476,8 @@ def set_module_tensor_to_device(
        and device not in tied_params_map[old_value.data_ptr()]
    ):
        tied_params_map[old_value.data_ptr()][device] = new_value
+        print("tied_map updated 1 ")
+        print(tied_params_map)
    elif (
        value is not None
        and tied_params_map is not None
@ -473,6 +485,8 @@ def set_module_tensor_to_device(
        and device not in tied_params_map[value.data_ptr()]
    ):
        tied_params_map[value.data_ptr()][device] = new_value
+        print("tied_map updated 2")
+        print(tied_params_map)


 def named_module_tensors(
--- a/tests/test_big_modeling.py
+++ b/tests/test_big_modeling.py
@ -145,6 +145,50 @@ class ModelWithUnusedSubModulesForTest(nn.Module):
        return self.linear4(self.linear3(self.batchnorm(self.linear2(self.linear1(x)))))


+# To test dispatch with tied weights
+class SubModule(torch.nn.Module):
+    def __init__(self, ref_to_parameter):
+        super().__init__()
+        self.parameter = ref_to_parameter
+
+    def forward(self, x):
+        return x + torch.max(self.parameter)
+
+
+class LinearModuleAndSubModule(torch.nn.Linear):
+    def __init__(self, in_features, out_features, name):
+        super().__init__(in_features, out_features, bias=False)
+        print("init weights")
+        self.name = name
+        self.weight_submodule = SubModule(self.weight)
+        self.weight_submodule2 = SubModule(self.weight)
+        self.weight_submodule3 = SubModule(self.weight)
+        self.weight_submodule4 = SubModule(self.weight)
+
+    def forward(self, x):
+        print("weight")
+        print(self.weight)
+        print("name")
+        print(self.name)
+        a = torch.nn.functional.linear(self.weight_submodule(x), self.weight)
+        b = torch.nn.functional.linear(self.weight_submodule2(x), self.weight)
+        c = torch.nn.functional.linear(self.weight_submodule3(x), self.weight)
+        d = torch.nn.functional.linear(self.weight_submodule4(x), self.weight)
+        return a + b + c + d
+
+
+class ModelWithSubmodules(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.module1 = LinearModuleAndSubModule(5000, 5000, "1")
+        self.module2 = LinearModuleAndSubModule(5000, 5000, "2")
+
+    def forward(self, x):
+        a = self.module1(x)
+        b = self.module2(x)
+        return a + b
+
+
 class BigModelingTester(unittest.TestCase):
    def test_init_empty_weights(self):
        # base use
@ -490,42 +534,8 @@ class BigModelingTester(unittest.TestCase):

        torch.cuda.empty_cache()  # Needed in case we run several tests in a row.

-        class SubModule(torch.nn.Module):
-            def __init__(self, ref_to_parameter):
-                super().__init__()
-                self.parameter = ref_to_parameter
-
-            def forward(self, x):
-                return x + torch.max(self.parameter)
-
-        class LinearModuleAndSubModule(torch.nn.Linear):
-            def __init__(self, in_features, out_features):
-                super().__init__(in_features, out_features, bias=False)
-                self.weight_submodule = SubModule(self.weight)
-                self.weight_submodule2 = SubModule(self.weight)
-                self.weight_submodule3 = SubModule(self.weight)
-                self.weight_submodule4 = SubModule(self.weight)
-
-            def forward(self, x):
-                a = torch.nn.functional.linear(self.weight_submodule(x), self.weight)
-                b = torch.nn.functional.linear(self.weight_submodule2(x), self.weight)
-                c = torch.nn.functional.linear(self.weight_submodule3(x), self.weight)
-                d = torch.nn.functional.linear(self.weight_submodule4(x), self.weight)
-                return a + b + c + d
-
-        class ModelWithSubmodules(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.compute = LinearModuleAndSubModule(5000, 5000)
-                self.compute1 = LinearModuleAndSubModule(5000, 5000)
-
-            def forward(self, x):
-                a = self.compute(x)
-                b = self.compute1(x)
-                return a + b
-
        # We should need only 2 * 5000 * 5000 * 32 // 8 * 1e-6 = 200 MB on the device 0 for the whole model forward, and not 600 MB.
-        device_map = {"compute": 0, "compute1": "disk"}
+        device_map = {"module1": 0, "module2": "disk"}

        model = ModelWithSubmodules()

@ -545,7 +555,13 @@ class BigModelingTester(unittest.TestCase):

        free_memory_bytes_before_dispatch = torch.cuda.mem_get_info("cuda:0")[0]
        with TemporaryDirectory() as tmp_dir:
+            print("before dispatch")
+            print(model.module1.weight)
+            print(model.module2.weight)
            dispatch_model(model, device_map, offload_dir=tmp_dir)
+            print("after dispatch")
+            print(model.module1.weight)
+            print(model.module2.weight)
            free_memory_bytes_after_dispatch = torch.cuda.mem_get_info("cuda:0")[0]

            assert (free_memory_bytes_after_dispatch - free_memory_bytes_before_dispatch) * 1e-6 < 130
@ -559,7 +575,6 @@ class BigModelingTester(unittest.TestCase):
                    )
                except Exception as e:
                    raise e
-
            assert torch.allclose(expected, output.cpu(), atol=1e-5)

            torch.cuda.empty_cache()
@ -568,16 +583,16 @@ class BigModelingTester(unittest.TestCase):

            # Check that we have no more references on GPU for the offloaded tied weight.
            n_non_empty = 0
-            for pointer, pointer_dict in model.compute1.weight_submodule._hf_hook.tied_params_map.items():
+            for pointer, pointer_dict in model.module1.weight_submodule._hf_hook.tied_params_map.items():
                if len(pointer_dict) > 0:
                    n_non_empty += 1
-            assert n_non_empty == 1  # `compute` layer one.
+            assert n_non_empty == 1  # `module1` layer one.

            n_non_empty = 0
-            for pointer, pointer_dict in model.compute1._hf_hook.tied_params_map.items():
+            for pointer, pointer_dict in model.module1._hf_hook.tied_params_map.items():
                if len(pointer_dict) > 0:
                    n_non_empty += 1
-            assert n_non_empty == 1  # `compute` layer one.
+            assert n_non_empty == 1  # `module1` layer one.

            assert (free_memory_bytes_after_infer - free_memory_bytes_after_dispatch) * 1e-6 < 130
Author	SHA1	Message	Date
Marc Sun	7f48cb52e7	bug	2024-05-15 16:46:44 +02:00
Marc Sun	e33aba7371	testing	2024-05-15 16:45:38 +02:00
Marc Sun	068d586938	tied_param	2024-05-15 16:30:30 +02:00
Marc Sun	76043b402f	tied map	2024-05-15 16:17:17 +02:00
Marc Sun	3126992054	more	2024-05-15 15:55:46 +02:00
Marc Sun	656e15e4f8	more	2024-05-15 15:41:34 +02:00
Marc Sun	1b21f9a630	test	2024-05-15 15:32:04 +02:00
Marc Sun	f592aad8df	test	2024-05-15 15:21:51 +02:00
Marc Sun	b69239577f	more debug	2024-05-15 15:11:27 +02:00
Marc Sun	f973f0d5f9	debug	2024-05-15 14:34:22 +02:00
Marc Sun	56580b40c5	only run big modeling test	2024-05-15 14:04:07 +02:00
Marc Sun	30ac26cf33	debug tests	2024-05-15 13:59:52 +02:00