Goodbye Torch 1.8 (#3082)

* bump torch18 -> torch19 * fix gptj --------- Co-authored-by: Jeff Rasley <jerasley@microsoft.com>
2025-10-20 15:33:51 +08:00 · 2023-03-23 11:43:28 -07:00
parent 5c2a81c2c1
commit 5cdf35935d
5 changed files with 13 additions and 10 deletions
--- a/.github/workflows/nv-torch19-p40.yml
+++ b/.github/workflows/nv-torch19-p40.yml
@ -1,4 +1,4 @@
-name: nv-torch18-p40
+name: nv-torch19-p40

 on:
  push:
@ -26,7 +26,7 @@ jobs:

      - name: Install pytorch
        run: |
-          pip install torch==1.8.2+cu111 torchvision==0.9.2+cu111 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
+          pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
          python -c "import torch; print('torch:', torch.__version__, torch)"
          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

@ -52,4 +52,4 @@ jobs:
        run: |
          if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
          cd tests
-          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --forked -n 4 unit/ --torch_ver="1.8" --cuda_ver="11.1"
+          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --forked -n 4 unit/ --torch_ver="1.9" --cuda_ver="11.1"
--- a/.github/workflows/nv-torch19-v100.yml
+++ b/.github/workflows/nv-torch19-v100.yml
@ -1,4 +1,4 @@
-name: nv-torch18-v100
+name: nv-torch19-v100

 on:
  push:
@ -28,7 +28,7 @@ jobs:

      - name: Install pytorch
        run: |
-          pip install torch==1.8.2+cu111 torchvision==0.9.2+cu111 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
+          pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
          python -c "import torch; print('torch:', torch.__version__, torch)"
          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

@ -55,5 +55,5 @@ jobs:
          unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
          if [[ -d ./torch-extensions ]]; then rm -rf ./torch-extensions; fi
          cd tests
-          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --forked -n 4  unit/ --torch_ver="1.8" --cuda_ver="11"
-          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --forked -m 'sequential' unit/ --torch_ver="1.8" --cuda_ver="11"
+          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --forked -n 4  unit/ --torch_ver="1.9" --cuda_ver="11"
+          TORCH_EXTENSIONS_DIR=./torch-extensions pytest --forked -m 'sequential' unit/ --torch_ver="1.9" --cuda_ver="11"
--- a/.github/workflows/nv-transformers-v100.yml
+++ b/.github/workflows/nv-transformers-v100.yml
@ -26,7 +26,7 @@ jobs:

      - name: Install pytorch
        run: |
-          pip install torch==1.8.2+cu111 torchvision==0.9.2+cu111 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
+          pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
          python -c "import torch; print('torch:', torch.__version__, torch)"
          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"

--- a/README.md
+++ b/README.md
@ -122,7 +122,7 @@ dynamically link them at runtime.

 ## Requirements
 * [PyTorch](https://pytorch.org/) must be installed _before_ installing DeepSpeed.
-* For full feature support we recommend a version of PyTorch that is >= 1.8 and ideally the latest PyTorch stable release.
+* For full feature support we recommend a version of PyTorch that is >= 1.9 and ideally the latest PyTorch stable release.
 * A CUDA or ROCm compiler such as [nvcc](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#introduction) or [hipcc](https://github.com/ROCm-Developer-Tools/HIPCC) used to compile C++/CUDA/HIP extensions.
 * Specific GPUs we develop and test against are listed below, this doesn't mean your GPU will not work if it doesn't fall into this category it's just DeepSpeed is most well tested on the following:
  * NVIDIA: Pascal, Volta, Ampere, and Hopper architectures
--- a/deepspeed/model_implementations/transformers/ds_transformer.py
+++ b/deepspeed/model_implementations/transformers/ds_transformer.py
@ -114,10 +114,13 @@ class DeepSpeedTransformerInference(nn.Module):
            # TODO(arashb): 'layer_head_mask' and 'past_key_value' are only added to satisfy the OPT models API.
            # This needs to be redesigned later!
            layer_head_mask=None,
-            past_key_value=None):
+            past_key_value=None,
+            **kwargs):

        if x is not None:
            input = x
+        if "hidden_states" in kwargs:
+            input = kwargs["hidden_states"]

        input_mask = (input_mask if attn_mask is None else
                      attn_mask) if attention_mask is None else attention_mask