Fix lower precision check for MKLDNN on Windows (#122645 )

Fixes #120788 Pull Request resolved: https://github.com/pytorch/pytorch/pull/121618 Approved by: https://github.com/xuhancn, https://github.com/jgong5, https://github.com/mingfeima, https://github.com/seemethere (cherry picked from commit 03717430cc54609189cc7df593b2c96a99fb7f55) Co-authored-by: CaoE <e.cao@intel.com>
Pin protobuf to 3.20.2 on macOS (#121918 ) (#122207 )
2025-11-06 17:24:59 +08:00 · 2024-03-25 17:33:04 -04:00 · 2024-03-19 11:41:52 -07:00 · 2024-03-18 16:47:46 -04:00 · 2024-03-18 12:59:48 -07:00 · 2024-03-18 12:59:39 -07:00
31 changed files with 204 additions and 376 deletions
--- a/.github/requirements/pip-requirements-macOS.txt
+++ b/.github/requirements/pip-requirements-macOS.txt
@ -28,3 +28,6 @@ rockset==1.0.3
 z3-solver==4.12.2.0
 tensorboard==2.13.0
 optree==0.9.1
+# NB: test_hparams_* from test_tensorboard is failing with protobuf 5.26.0 in
+# which the stringify metadata is wrong when escaping double quote
+protobuf==3.20.2
--- a/.github/workflows/build-triton-wheel.yml
+++ b/.github/workflows/build-triton-wheel.yml
@ -1,307 +0,0 @@
-name: Build Triton wheels
-
-on:
-  push:
-    branches:
-      - release/2.2
-    tags:
-      # NOTE: Binary build pipelines should only get triggered on release candidate builds
-      # Release candidate tags look like: v1.11.0-rc1
-      - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
-    paths:
-      - .github/workflows/build-triton-wheel.yml
-      - .github/scripts/build_triton_wheel.py
-      - .github/ci_commit_pins/triton.txt
-      - .ci/docker/ci_commit_pins/triton.txt
-      - .ci/docker/ci_commit_pins/triton-rocm.txt
-  pull_request:
-    paths:
-      - .github/workflows/build-triton-wheel.yml
-      - .github/scripts/build_triton_wheel.py
-      - .github/ci_commit_pins/triton.txt
-      - .ci/docker/ci_commit_pins/triton.txt
-      - .ci/docker/ci_commit_pins/triton-rocm.txt
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
-  cancel-in-progress: true
-
-jobs:
-  build-wheel:
-    name: "Build Triton Wheel"
-    runs-on: [self-hosted, linux.2xlarge]
-    strategy:
-      fail-fast: false
-      matrix:
-        py_vers: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
-        device: ["cuda", "rocm"]
-        include:
-          - device: "rocm"
-            rocm_version: "5.7"
-          - device: "cuda"
-            rocm_version: ""
-    timeout-minutes: 40
-    env:
-      DOCKER_IMAGE: ${{ matrix.device == 'rocm' && format('pytorch/manylinux-rocm:{0}', matrix.rocm_version) || 'pytorch/manylinux-builder:cpu' }}
-      PY_VERS: ${{ matrix.py_vers }}
-      BUILD_DEVICE: ${{ matrix.device }}
-    steps:
-      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
-        with:
-          github-secret: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
-        with:
-          submodules: false
-
-      - name: Setup Linux
-        uses: ./.github/actions/setup-linux
-
-      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
-        with:
-          docker-image: ${{ env.DOCKER_IMAGE }}
-
-      - name: Build Triton wheel
-        env:
-          IS_RELEASE_TAG: ${{ startsWith(github.event.ref, 'refs/tags/v') }}
-        run: |
-          set -x
-          mkdir -p "${RUNNER_TEMP}/artifacts/"
-          container_name=$(docker run \
-            --tty \
-            --detach \
-            -v "${GITHUB_WORKSPACE}:/pytorch" \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -w /artifacts/ \
-            "${DOCKER_IMAGE}"      \
-          )
-
-          # Determine python executable for given version
-          case $PY_VERS in
-          3.8)
-            PYTHON_EXECUTABLE=/opt/python/cp38-cp38/bin/python
-            ;;
-          3.9)
-            PYTHON_EXECUTABLE=/opt/python/cp39-cp39/bin/python
-            ;;
-          3.10)
-            PYTHON_EXECUTABLE=/opt/python/cp310-cp310/bin/python
-            ;;
-          3.11)
-            PYTHON_EXECUTABLE=/opt/python/cp311-cp311/bin/python
-            ;;
-          3.12)
-            PYTHON_EXECUTABLE=/opt/python/cp312-cp312/bin/python
-            ;;
-          *)
-            echo "Unsupported python version ${PY_VERS}"
-            exit 1
-            ;;
-          esac
-
-          BUILD_ROCM=""
-          if [[ "$BUILD_DEVICE" == "rocm" ]]; then
-            BUILD_ROCM="--build-rocm"
-          fi
-
-          RELEASE=""
-          if [[ "${IS_RELEASE_TAG}" == true ]]; then
-            RELEASE="--release"
-          fi
-
-          docker exec -t "${container_name}" yum install -y zlib-devel zip
-          docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}"  -m pip install -U setuptools==67.4.0
-          docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" /pytorch/.github/scripts/build_triton_wheel.py $BUILD_ROCM $RELEASE
-          docker exec -t "${container_name}" chown -R 1000.1000 /artifacts
-
-      - uses: actions/upload-artifact@v3
-        with:
-          # NB: Use the same name here and all wheels can be downloaded by referring to the same artifact
-          name: pytorch-triton-wheel
-          if-no-files-found: error
-          path: ${{ runner.temp }}/artifacts/*
-
-      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
-        if: always()
-
-  upload-wheel:
-    runs-on: ubuntu-22.04
-    needs: build-wheel
-    permissions:
-      id-token: write
-      contents: read
-    container:
-      image: continuumio/miniconda3:4.12.0
-    environment: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) && 'conda-aws-upload' || '' }}
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Configure AWS credentials(PyTorch account) for main
-        if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' }}
-        uses: aws-actions/configure-aws-credentials@v3
-        with:
-          role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels
-          aws-region: us-east-1
-
-      - name: Configure AWS credentials(PyTorch account) for RC builds
-        if: ${{ github.event_name == 'push' &&  (startsWith(github.event.ref, 'refs/tags/') && !startsWith(github.event.ref, 'refs/tags/ciflow/')) }}
-        uses: aws-actions/configure-aws-credentials@v3
-        with:
-          role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_test_build_wheels
-          aws-region: us-east-1
-
-      - name: Download Build Artifacts
-        uses: actions/download-artifact@v3
-        with:
-          name: pytorch-triton-wheel
-          path: ${{ runner.temp }}/artifacts/
-
-      - name: Set DRY_RUN (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
-        shell: bash
-        run: |
-          echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
-
-      - name: Set UPLOAD_CHANNEL (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
-        shell: bash
-        run: |
-          set -ex
-
-          # reference ends with an RC suffix
-          if [[ "${GITHUB_REF_NAME}" = *-rc[0-9]* ]]; then
-            echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
-          fi
-
-      # NB: This step is gated by DRY_RUN, which is enabled everywhere except main and release branches
-      - name: Upload binaries
-        env:
-          PACKAGE_TYPE: wheel
-          # The UPLOAD_SUBFOLDER needs to be empty here so that triton wheels are uploaded
-          # to nightly or test
-          UPLOAD_SUBFOLDER: ""
-          PKG_DIR: ${{ runner.temp }}/artifacts
-        shell: bash
-        run: |
-          set -ex
-          bash .circleci/scripts/binary_upload.sh
-
-  build-conda:
-    name: "Build Triton Conda"
-    runs-on: [self-hosted, linux.2xlarge]
-    strategy:
-      fail-fast: false
-      matrix:
-        py_vers: [ "3.8", "3.9", "3.10", "3.11" ]
-    timeout-minutes: 40
-    env:
-      DOCKER_IMAGE: pytorch/conda-builder:cpu
-      PY_VERS: ${{ matrix.py_vers }}
-    steps:
-      - name: Setup SSH (Click me for login details)
-        uses: pytorch/test-infra/.github/actions/setup-ssh@release/2.2
-        with:
-          github-secret: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Checkout PyTorch
-        uses: pytorch/pytorch/.github/actions/checkout-pytorch@release/2.2
-        with:
-          submodules: false
-
-      - name: Setup Linux
-        uses: ./.github/actions/setup-linux
-
-      - name: Pull Docker image
-        uses: pytorch/test-infra/.github/actions/pull-docker-image@release/2.2
-        with:
-          docker-image: ${{ env.DOCKER_IMAGE }}
-
-      - name: Build Triton conda package
-        env:
-          IS_RELEASE_TAG: ${{ startsWith(github.event.ref, 'refs/tags/v') }}
-        run: |
-          set -x
-          mkdir -p "${RUNNER_TEMP}/artifacts/"
-          container_name=$(docker run \
-            --tty \
-            --detach \
-            -v "${GITHUB_WORKSPACE}:/pytorch" \
-            -v "${RUNNER_TEMP}/artifacts:/artifacts" \
-            -w /artifacts/ \
-            "${DOCKER_IMAGE}" \
-          )
-
-          RELEASE=""
-          if [[ "${IS_RELEASE_TAG}" == true ]]; then
-            RELEASE="--release"
-          fi
-
-          docker exec -t "${container_name}" yum install -y llvm11 llvm11-devel llvm11-static llvm11-libs zlib-devel
-          docker exec -t "${container_name}" python /pytorch/.github/scripts/build_triton_wheel.py --build-conda --py-version="${PY_VERS}" $RELEASE
-          docker exec -t "${container_name}" chown -R 1000.1000 /artifacts
-
-      - uses: actions/upload-artifact@v3
-        with:
-          # NB: Use the same name here and all wheels can be downloaded by referring to the same artifact
-          name: pytorch-triton-conda
-          if-no-files-found: error
-          path: ${{ runner.temp }}/artifacts/*
-
-      - name: Teardown Linux
-        uses: pytorch/test-infra/.github/actions/teardown-linux@release/2.2
-        if: always()
-
-  upload-conda:
-    runs-on: ubuntu-22.04
-    needs: build-conda
-    container:
-      image: continuumio/miniconda3:4.12.0
-    environment: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) && 'conda-aws-upload' || '' }}
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Download Build Artifacts
-        uses: actions/download-artifact@v3
-        with:
-          name: pytorch-triton-conda
-          path: ${{ runner.temp }}/artifacts/
-
-      - name: Set DRY_RUN (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }}
-        shell: bash
-        run: |
-          echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
-
-      - name: Set UPLOAD_CHANNEL (only for tagged pushes)
-        if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }}
-        shell: bash
-        run: |
-          set -ex
-
-          # reference ends with an RC suffix
-          if [[ "${GITHUB_REF_NAME}" = *-rc[0-9]* ]]; then
-            echo "UPLOAD_CHANNEL=test" >> "$GITHUB_ENV"
-          fi
-
-      # NB: This step is gated by DRY_RUN, which is enabled everywhere except nightly and release branches
-      - name: Upload binaries to Anaconda
-        env:
-          PACKAGE_TYPE: conda
-          PKG_DIR: ${{ runner.temp }}/artifacts
-          # When running these on pull_request events these should be blank
-          CONDA_PYTORCHBOT_TOKEN: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }}
-          CONDA_PYTORCHBOT_TOKEN_TEST: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }}
-        shell: bash
-        run: |
-          set -ex
-
-          if [[ "${UPLOAD_CHANNEL:-nightly}" == "nightly" ]]; then
-            export ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN}"
-          else
-            export ANACONDA_API_TOKEN="${CONDA_PYTORCHBOT_TOKEN_TEST}"
-          fi
-          bash .circleci/scripts/binary_upload.sh
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -15,6 +15,9 @@ jobs:
    if: ${{ github.repository == 'pytorch/pytorch' }}
    name: Create Release
    runs-on: ubuntu-latest
+    # https://github.com/softprops/action-gh-release?tab=readme-ov-file#permissions
+    permissions:
+      contents: write
    steps:
      - uses: malfet/checkout@silent-checkout
        with:
--- a/README.md
+++ b/README.md
@ -158,7 +158,7 @@ They require JetPack 4.2 and above, and [@dusty-nv](https://github.com/dusty-nv)
 #### Prerequisites
 If you are installing from source, you will need:
 - Python 3.8 or later (for Linux, Python 3.8.1+ is needed)
- A compiler that fully supports C++17, such as clang or gcc (especially for aarch64, gcc 9.4.0 or newer is required)
+- A compiler that fully supports C++17, such as clang or gcc (gcc 9.4.0 or newer is required)

 We highly recommend installing an [Anaconda](https://www.anaconda.com/download) environment. You will get a high-quality BLAS library (MKL) and you get controlled dependency versions regardless of your Linux distro.

--- a/aten/src/ATen/cpu/vec/vec_base.h
+++ b/aten/src/ATen/cpu/vec/vec_base.h
@ -147,9 +147,8 @@ public:
  // versions GCC/Clang have buggy determinations on whether or not an
  // identifier is odr-used or not, and in any case it's hard to tell if
  // a variable is odr-used or not.  So best to just cut the problem at the root.
-  static constexpr size_type size_T = sizeof(T);  // Workaround to compile with VS2022.
  static constexpr size_type size() {
-    return VECTOR_WIDTH / size_T;
+    return VECTOR_WIDTH / sizeof(T);
  }
  Vectorized() : values{static_cast<T>(0)} {}
  Vectorized(T val) {
--- a/aten/src/ATen/native/mkldnn/Utils.h
+++ b/aten/src/ATen/native/mkldnn/Utils.h
@ -97,7 +97,7 @@ constexpr bool mkldnn_bf16_device_check_arm() {

 #if AT_MKLDNN_ENABLED()
 inline bool mkldnn_bf16_device_check() {
-#if defined(__x86_64__)
+#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
  // Use ideep to check bf16 on X64 as cpuinfo has no avx_ne_convert check.
  return ideep::has_bf16_type_support();
 #else
@ -106,7 +106,7 @@ inline bool mkldnn_bf16_device_check() {
 }

 inline bool mkldnn_fp16_device_check() {
-#if defined(__x86_64__)
+#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
  return ideep::has_fp16_type_support();
 #else
  return false;
--- a/c10/util/C++17.h
+++ b/c10/util/C++17.h
@ -12,14 +12,14 @@
 #include <utility>

 #if !defined(__clang__) && !defined(_MSC_VER) && defined(__GNUC__) && \
-    __GNUC__ < 5
+    __GNUC__ < 9
 #error \
-    "You're trying to build PyTorch with a too old version of GCC. We need GCC 5 or later."
+    "You're trying to build PyTorch with a too old version of GCC. We need GCC 9 or later."
 #endif

-#if defined(__clang__) && __clang_major__ < 4
+#if defined(__clang__) && __clang_major__ < 9
 #error \
-    "You're trying to build PyTorch with a too old version of Clang. We need Clang 4 or later."
+    "You're trying to build PyTorch with a too old version of Clang. We need Clang 9 or later."
 #endif

 #if (defined(_MSC_VER) && (!defined(_MSVC_LANG) || _MSVC_LANG < 201703L)) || \
--- a/scripts/install_triton_wheel.sh
+++ b/scripts/install_triton_wheel.sh
@ -1,3 +1,11 @@
 #!/bin/bash
 # Updates Triton to the pinned version for this copy of PyTorch
-pip install --index-url https://download.pytorch.org/whl/nightly/ "pytorch-triton==$(cat .ci/docker/triton_version.txt)+$(head -c 10 .ci/docker/ci_commit_pins/triton.txt)"
+BRANCH=$(git rev-parse --abbrev-ref HEAD)
+TRITON_VERSION="pytorch-triton==$(cat .ci/docker/triton_version.txt)"
+DOWNLOAD_PYTORCH_ORG="https://download.pytorch.org/whl"
+
+if [[ "$BRANCH" =~ .*release.* ]]; then
+    pip install --index-url ${DOWNLOAD_PYTORCH_ORG}/test/ $TRITON_VERSION
+else
+    pip install --index-url ${DOWNLOAD_PYTORCH_ORG}/nightly/ $TRITON_VERSION+$(head -c 10 .ci/docker/ci_commit_pins/triton.txt)
+fi
--- a/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py
+++ b/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py
@ -313,30 +313,6 @@ class TestFSDPWithDeviceMeshAndDTensor(DTensorTestBase):
            with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT):
                optim_state_dict = FSDP.optim_state_dict(model, optim)

-        with self.assertLogs(
-            "torch.distributed.fsdp._state_dict_utils", level="WARNING"
-        ) as log:
-            with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT):
-                state_dict = model.state_dict()
-                self.assertEqual(len(log.records), 1)
-                self.assertEqual(len(log.output), 1)
-                self.assertIn(
-                    "Found both state_dict_type FULL_STATE_DICT and device_mesh.",
-                    log.output[0],
-                )
-
-        with self.assertLogs(
-            "torch.distributed.fsdp._optim_utils", level="WARNING"
-        ) as log:
-            with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT):
-                state_dict = FSDP.optim_state_dict(model, optim)
-                self.assertEqual(len(log.records), 1)
-                self.assertEqual(len(log.output), 1)
-                self.assertIn(
-                    "Found both state_dict_type FULL_STATE_DICT and device_mesh.",
-                    log.output[0],
-                )
-

 instantiate_parametrized_tests(TestFSDPWithDeviceMeshAndDTensor)
 if __name__ == "__main__":
--- a/test/dynamo/test_misc.py
+++ b/test/dynamo/test_misc.py
@ -2324,6 +2324,29 @@ utils_device.CURRENT_DEVICE == None""".split(
        self.assertTrue(same(fn(x, y), opt_fn(x.clone(), y.clone())))
        self.assertEqual(cnts.frame_count, 1)

+    def test_out_variants_with_resizing_on_graph_inputs_with_dynamic(self):
+        # https://github.com/pytorch/pytorch/issues/120482
+        class CustomModel(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, inputs):
+                return torch.outer(**inputs)
+
+        compile_fn = torch.compile(CustomModel(), fullgraph=True)
+
+        shapes = [(2, 1), (6, 1), (4, 1)]
+        for shape in shapes:
+            vec1, vec2 = shape
+            input_tensor1 = torch.randn(vec1)
+            input_tensor2 = torch.randn(vec2)
+            out_tensor = torch.empty(shape)
+            args = {"input": input_tensor1, "vec2": input_tensor2, "out": out_tensor}
+            res = compile_fn(args)
+            opt_res = res.clone()  # cuz this is out and we mutate it
+            res = CustomModel()(args)
+            self.assertEqual(res, opt_res)
+
    def test_dict_mutation_side_effect(self):
        def fn(d):
            d["c"] = d["a"] + d.pop("b")
--- a/test/export/test_db.py
+++ b/test/export/test_db.py
@ -15,9 +15,10 @@ from torch.testing._internal.common_utils import (
    parametrize,
    run_tests,
    TestCase,
+    IS_WINDOWS
 )

-
+@unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
@unittest.skipIf(not torchdynamo.is_dynamo_supported(), "dynamo doesn't support")
 class ExampleTests(TestCase):
    # TODO Maybe we should make this tests actually show up in a file?
--- a/test/export/test_export.py
+++ b/test/export/test_export.py
@ -23,7 +23,11 @@ from torch._subclasses import FakeTensorMode
 from torch.export import Constraint, Dim, export
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch.testing import FileCheck
-from torch.testing._internal.common_utils import run_tests, TestCase
+from torch.testing._internal.common_utils import (
+    run_tests,
+    TestCase,
+    IS_WINDOWS,
+)
 from torch.utils._pytree import (
    LeafSpec,
    tree_flatten,
@ -95,7 +99,7 @@ class TestDynamismExpression(TestCase):
        # Being able to export means shape is preserved as static
        export(branch_on_shape, inp)

-
+@unittest.skipIf(IS_WINDOWS, "Windows isn't supported for this case")
@unittest.skipIf(not torchdynamo.is_dynamo_supported(), "dynamo isn't support")
 class TestExport(TestCase):

--- a/test/export/test_pass_infra.py
+++ b/test/export/test_pass_infra.py
@ -7,7 +7,7 @@ from functorch.experimental import control_flow
 from torch._dynamo.eval_frame import is_dynamo_supported
 from torch._export import export
 from torch._export.pass_base import _ExportPassBase
-from torch.testing._internal.common_utils import run_tests, TestCase
+from torch.testing._internal.common_utils import run_tests, TestCase, IS_WINDOWS


@unittest.skipIf(not is_dynamo_supported(), "Dynamo not supported")
@ -37,6 +37,7 @@ class TestPassInfra(TestCase):
            self.assertEqual(new_node.op, old_node.op)
            self.assertEqual(new_node.target, old_node.target)

+    @unittest.skipIf(IS_WINDOWS, "Windows not supported")
    def test_cond(self) -> None:
        class M(torch.nn.Module):
            def __init__(self):
--- a/test/export/test_passes.py
+++ b/test/export/test_passes.py
@ -9,7 +9,7 @@ from typing import List, Set
 import operator

 import torch
-from torch.testing._internal.common_utils import run_tests, TestCase
+from torch.testing._internal.common_utils import run_tests, TestCase, IS_WINDOWS
 from torch.testing import FileCheck
 from torch._dynamo.eval_frame import is_dynamo_supported
 from torch._export import export
@ -26,6 +26,7 @@ from torch._export.passes.functionalize_side_effectful_ops_pass import (
 from functorch.experimental.control_flow import cond
 from torch.fx.passes.operator_support import OperatorSupport
 from torch.fx.passes.infra.partitioner import Partition
+
 from torch.utils import _pytree as pytree


@ -274,6 +275,7 @@ class TestPasses(TestCase):
        new_inp = torch.tensor([1, 1, 1, 1])
        self.assertEqual(mod(new_inp), ep(new_inp))

+    @unittest.skipIf(IS_WINDOWS, "Windows not supported")
    def test_runtime_assert_inline_constraints_for_cond(self) -> None:
        class M(torch.nn.Module):
            def __init__(self):
--- a/test/export/test_serialize.py
+++ b/test/export/test_serialize.py
@ -185,7 +185,7 @@ class TestSerialize(TestCase):
        self.assertEqual(node.inputs[3].name, "side")
        self.assertEqual(node.inputs[3].arg.as_string, "right")

-
+@unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
@unittest.skipIf(not torchdynamo.is_dynamo_supported(), "dynamo doesn't support")
 class TestDeserialize(TestCase):
    def check_graph(self, fn, inputs, dynamic_shapes=None, _check_meta=True) -> None:
--- a/test/export/test_unflatten.py
+++ b/test/export/test_unflatten.py
@ -20,7 +20,11 @@ from torch._export.utils import (
 )
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch.testing import FileCheck
-from torch.testing._internal.common_utils import run_tests, TestCase
+from torch.testing._internal.common_utils import (
+    run_tests,
+    TestCase,
+    IS_WINDOWS,
+)
 from torch.utils._pytree import (
    LeafSpec,
    tree_flatten,
@ -188,6 +192,7 @@ class TestUnflatten(TestCase):
            id(getattr(unflattened_module.sub_net, "2")),
        )

+    @unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
    def test_unflatten_preserve_signature(self):
        class NestedChild(torch.nn.Module):
            def forward(self, zx, y):
--- a/test/export/test_upgrade.py
+++ b/test/export/test_upgrade.py
@ -10,6 +10,7 @@ from torch._export.serde.upgrade import get_target_version, get_upgraders
 from torch.testing._internal.common_utils import (
    run_tests,
    TestCase,
+    IS_WINDOWS,
 )

 TEST_UPGRADERS = {
@ -112,6 +113,7 @@ def div__Scalar_mode_0_3(self: torch.Tensor, other: Any,  *, rounding_mode: Opti
        custom_op_count = count_op(upgraded.graph, "aten::div__Scalar_mode_0_3")
        self.assertEqual(custom_op_count, 1)

+    @unittest.skipIf(IS_WINDOWS, "Test case not supported on Windows")
    def test_div_upgrader_pass_return_new_op_after_retrace(self):
        def fn(a: torch.Tensor, b):
            return torch.ops.aten.div.Scalar_mode(a, b, rounding_mode='trunc')
--- a/test/export/test_verifier.py
+++ b/test/export/test_verifier.py
@ -9,7 +9,7 @@ from torch._export import export

 from torch._export.verifier import SpecViolationError, Verifier
 from torch.export.exported_program import InputKind, InputSpec, TensorArgument
-from torch.testing._internal.common_utils import run_tests, TestCase
+from torch.testing._internal.common_utils import run_tests, TestCase, IS_WINDOWS

@unittest.skipIf(not is_dynamo_supported(), "dynamo isn't supported")
 class TestVerifier(TestCase):
@ -50,6 +50,7 @@ class TestVerifier(TestCase):
        with self.assertRaises(SpecViolationError):
            verifier.check(ep)

+    @unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
    def test_verifier_higher_order(self) -> None:
        def f(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
            def true_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
@ -67,6 +68,7 @@ class TestVerifier(TestCase):
        verifier = Verifier()
        verifier.check(ep)

+    @unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
    def test_verifier_nested_invalid_module(self) -> None:
        def f(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
            def true_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
--- a/test/functorch/test_aotdispatch.py
+++ b/test/functorch/test_aotdispatch.py
@ -13,6 +13,7 @@ from torch.testing._internal.common_utils import (
    run_tests,
    IS_ARM64,
    IS_MACOS,
+    IS_WINDOWS,
    IS_X86,
    compare_equal_outs_and_grads,
    outs_and_grads,
@ -2940,6 +2941,7 @@ class <lambda>(torch.nn.Module):
        ):
            aot_export_module(mod, [inp], trace_joint=True, output_loss_index=1)

+    @unittest.skipIf(IS_WINDOWS, "Windows isn't supported for this case")
    @unittest.skipIf(not torch._dynamo.is_dynamo_supported(), "Cond needs dynamo to run")
    def test_aot_export_with_torch_cond(self):
        class M(torch.nn.Module):
--- a/test/functorch/test_control_flow.py
+++ b/test/functorch/test_control_flow.py
@ -8,7 +8,7 @@ import torch.utils._pytree as pytree
 from functorch.experimental import control_flow
 from functorch.experimental.control_flow import UnsupportedAliasMutationException, cond
 from torch.fx.experimental.proxy_tensor import make_fx
-from torch.testing._internal.common_utils import run_tests, TestCase
+from torch.testing._internal.common_utils import run_tests, TestCase, IS_WINDOWS
 from torch.testing._internal.common_quantization import skipIfNoDynamoSupport
 from torch._subclasses.functional_tensor import FunctionalTensor

@ -77,7 +77,7 @@ class ReduceMod(torch.nn.Module):
        return self._reduce(*operands)


-
+@unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
@skipIfNoDynamoSupport
 class TestControlFlow(TestCase):
    def setUp(self):
@ -250,6 +250,7 @@ class TestControlFlow(TestCase):
        self.assertEqual(true_outs, fake_outs)


+@unittest.skipIf(IS_WINDOWS, "Windows not supported for this test")
@skipIfNoDynamoSupport
 class TestControlFlowTraced(TestCase):
    def setUp(self):
--- a/test/fx/test_fx_split.py
+++ b/test/fx/test_fx_split.py
@ -145,3 +145,76 @@ class TestSplitByTags(TestCase):
            },
            f"{orig_to_split_fqn_mapping=}",
        )
+
+class TestSplitOutputType(TestCase):
+    class TestModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv2d(3, 16, 3, stride=1, bias=True)
+            self.relu = torch.nn.ReLU()
+
+        def forward(self, x):
+            conv = self.conv(x)
+            conv = conv * 0.5
+            relu = self.relu(conv)
+            return relu
+
+    @staticmethod
+    def trace_and_tag(
+        module: torch.nn.Module, inputs: torch.Tensor, tags: List[str]
+    ) -> Tuple[torch.fx.GraphModule, Dict[str, List[str]]]:
+        """
+        Test simple gm consists of nodes with tag (only show call_module nodes here):
+            conv - tag: "red"
+            mul - tag: "blue"
+            relu - tag: "green"
+
+        At the beginning we have:
+            gm:
+                conv
+                mul
+                relu
+
+        split_gm = split_by_tags(gm, tags)
+
+        Then we have:
+            split_gm:
+                red:
+                    conv
+                blue:
+                    mul
+                green:
+                    relu
+        """
+        tag_node = defaultdict(list)
+        gm: torch.fx.GraphModule = torch.export.export(module, (inputs,)).module()
+        # Add tag to all nodes and build dictionary record tag to call_module nodes
+        for node in gm.graph.nodes:
+            if "conv" in node.name:
+                node.tag = tags[0]
+                tag_node[tags[0]].append(node.name)
+            elif "mul" in node.name:
+                node.tag = tags[1]
+                tag_node[tags[1]].append(node.name)
+            else:
+                node.tag = tags[2]
+                if node.op == "call_module":
+                    tag_node[tags[2]].append(node.name)
+        return gm, tag_node
+
+    def test_split_by_tags(self) -> None:
+        tags = ["red", "blue", "green"]
+        module = TestSplitOutputType.TestModule()
+
+        inputs = torch.randn((1, 3, 224, 224))
+
+        gm, tag_node = TestSplitOutputType.trace_and_tag(module, inputs, tags)
+        split_gm, orig_to_split_fqn_mapping = split_by_tags(
+            gm, tags, return_fqn_mapping=True
+        )
+
+        gm_output = module(inputs)
+        split_gm_output = split_gm(inputs)
+
+        self.assertTrue(type(gm_output) == type(split_gm_output))
+        self.assertTrue(torch.equal(gm_output, split_gm_output))
--- a/test/quantization/pt2e/test_x86inductor_quantizer.py
+++ b/test/quantization/pt2e/test_x86inductor_quantizer.py
@ -14,7 +14,7 @@ from torch.testing._internal.common_quantization import (
    NodeSpec as ns,
    QuantizationTestCase,
    skipIfNoX86,
-    skipIfNoDynamoSupport,
+    skipIfNoInductorSupport,
 )
 from torch.testing._internal.common_quantized import override_quantized_engine
 from enum import Enum
@ -321,7 +321,7 @@ class X86InductorQuantTestCase(QuantizationTestCase):
        )
        return export_model, prepare_model, convert_model

-@skipIfNoDynamoSupport
+@skipIfNoInductorSupport
 class TestQuantizePT2EX86Inductor(X86InductorQuantTestCase):
    @skipIfNoX86
    def test_conv2d(self):
--- a/test/test_content_store.py
+++ b/test/test_content_store.py
@ -1,13 +1,19 @@
 # Owner(s): ["oncall: pt2"]

 import tempfile
+import unittest

 import torch
 from torch._prims.debug_prims import load_tensor_reader
 from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode
 from torch.multiprocessing.reductions import StorageWeakRef
 from torch.testing._internal.common_device_type import instantiate_device_type_tests
-from torch.testing._internal.common_utils import run_tests, skipIfRocm, TestCase
+from torch.testing._internal.common_utils import (
+    IS_WINDOWS,
+    run_tests,
+    skipIfRocm,
+    TestCase,
+)
 from torch.utils._content_store import (
    ContentStoreReader,
    ContentStoreWriter,
@ -15,6 +21,7 @@ from torch.utils._content_store import (
 )


+@unittest.skipIf(IS_WINDOWS, "Test case not supported on Windows")
 class TestContentStore(TestCase):
    def test_basic(self, device):
        # setup test data
--- a/torch/_dynamo/init.py
+++ b/torch/_dynamo/init.py
@ -21,6 +21,7 @@ from .eval_frame import (
    explain,
    export,
    is_dynamo_supported,
+    is_inductor_supported,
    optimize,
    optimize_assert,
    OptimizedModule,
--- a/torch/_dynamo/backends/inductor.py
+++ b/torch/_dynamo/backends/inductor.py
@ -1,8 +1,15 @@
+# mypy: ignore-errors
+
+import sys
+
 from torch._dynamo import register_backend


@register_backend
 def inductor(*args, **kwargs):
+    if sys.platform == "win32":
+        raise RuntimeError("Windows not yet supported for inductor")
+
    # do import here to avoid loading inductor into memory when it is not used
    from torch._inductor.compile_fx import compile_fx

--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@ -698,8 +698,6 @@ class _NullDecorator(contextlib.nullcontext):  # type: ignore[type-arg]


 def check_if_dynamo_supported():
-    if sys.platform == "win32":
-        raise RuntimeError("Windows not yet supported for torch.compile")
    if sys.version_info >= (3, 12):
        raise RuntimeError("Python 3.12+ not yet supported for torch.compile")

@ -712,6 +710,21 @@ def is_dynamo_supported():
        return False


+def check_if_inductor_supported():
+    check_if_dynamo_supported()
+
+    if sys.platform == "win32":
+        raise RuntimeError("Windows not yet supported for inductor")
+
+
+def is_inductor_supported():
+    try:
+        check_if_inductor_supported()
+        return True
+    except Exception:
+        return False
+
+
 def optimize(
    backend="inductor",
    *,
--- a/torch/_dynamo/variables/torch.py
+++ b/torch/_dynamo/variables/torch.py
@ -575,18 +575,18 @@ Either create the tensor outside the compiled region, or do not set the tensor t
                            tx.symbolic_locals[name] = tensor_variable.items[idx]
                elif isinstance(tensor_variable, TensorVariable):
                    assert isinstance(kwargs["out"], TensorVariable)
+                    assert "example_value" in kwargs["out"].proxy.node.meta
+                    fake_tensor = tensor_variable.proxy.node.meta["example_value"]
+                    fake_out = kwargs["out"].proxy.node.meta["example_value"]
                    if (
                        kwargs["out"].source
                        and kwargs["out"] in tx.output.graphargs
-                        and kwargs["out"].size != tensor_variable.size
+                        and fake_out.shape != fake_tensor.shape
                    ):
                        # It's hard to get out variants with resizing on graph inputs work
                        # properly across dynamo/aot/inductor, just fall back.
                        unimplemented("out variants with resizing on graph inputs")
-                    assert "example_value" in kwargs["out"].proxy.node.meta
-                    if not torch._prims_common.is_contiguous(
-                        kwargs["out"].proxy.node.meta["example_value"]
-                    ):
+                    if not torch._prims_common.is_contiguous(fake_out):
                        # It's difficult to handle strides correctly in functionalization
                        # when calling an out= op with a non-contiguous out argument
                        unimplemented(
--- a/torch/distributed/fsdp/_optim_utils.py
+++ b/torch/distributed/fsdp/_optim_utils.py
@ -2083,10 +2083,5 @@ def _set_optim_use_dtensor(
                "DeviceMesh is not compatible with LOCAL_STATE_DICT.",
                "Please set state_dict_type to SHARDED_STATE_DICT to get DTensor state_dict.",
            )
-        elif state_dict_type == StateDictType.FULL_STATE_DICT:
-            logger.warning(
-                "Found both state_dict_type FULL_STATE_DICT and device_mesh. "  # noqa: G004
-                "Please set state_dict_type to SHARDED_STATE_DICT to get DTensor state_dict."
-            )
        else:
            state_dict_settings.optim_state_dict_config._use_dtensor = True
--- a/torch/distributed/fsdp/_state_dict_utils.py
+++ b/torch/distributed/fsdp/_state_dict_utils.py
@ -292,11 +292,6 @@ def _full_pre_state_dict_hook(
    """
    if getattr(fsdp_state, "_device_mesh", False):
        parent_mesh = _mesh_resources.get_parent_mesh(fsdp_state._device_mesh)
-        if parent_mesh:
-            raise RuntimeError(
-                f"Found FSDP's device_mesh {fsdp_state._device_mesh} has a parent device_mesh {parent_mesh}.",
-                "We do not support FULL_STATE_DICT for 2D FSDP + TP. Please use FSDP SHARDED_STATE_DICT instead.",
-            )

    _common_pre_state_dict_hook(module, fsdp_state)
    _common_unshard_pre_state_dict_hook(
@ -804,11 +799,6 @@ def _set_use_dtensor(fsdp_state: _FSDPState) -> None:
                "DeviceMesh is not compatible with LOCAL_STATE_DICT.",
                "Please set state_dict_type to SHARDED_STATE_DICT to get DTensor state_dict.",
            )
-        elif state_dict_type == StateDictType.FULL_STATE_DICT:
-            logger.warning(
-                "Found both state_dict_type FULL_STATE_DICT and device_mesh. "  # noqa: G004
-                "Please set state_dict_type to SHARDED_STATE_DICT to get DTensor state_dict."
-            )
        else:
            fsdp_state._state_dict_config._use_dtensor = True

--- a/torch/fx/passes/split_utils.py
+++ b/torch/fx/passes/split_utils.py
@ -283,6 +283,7 @@ def split_by_tags(

    main_g.output(map_arg(output_node.args[0], main_remapping.__getitem__))
    main_root = HolderModule({comp.name: comp.gm for comp in all_components})
+    main_g._codegen = gm.graph._codegen

    # If the output nodes consumes get_attr directly in the original graph,
    # then we need to make sure get_attr is copied to the new graph.
--- a/torch/testing/_internal/common_quantization.py
+++ b/torch/testing/_internal/common_quantization.py
@ -404,6 +404,22 @@ def skipIfNoDynamoSupport(fn):
            fn(*args, **kwargs)
    return wrapper

+def skipIfNoInductorSupport(fn):
+    reason = "inductor doesn't support."
+    if isinstance(fn, type):
+        if not torchdynamo.is_inductor_supported():
+            fn.__unittest_skip__ = True
+            fn.__unittest_skip_why__ = reason
+        return fn
+
+    @functools.wraps(fn)
+    def wrapper(*args, **kwargs):
+        if not torchdynamo.is_inductor_supported():
+            raise unittest.SkipTest(reason)
+        else:
+            fn(*args, **kwargs)
+    return wrapper
+
 try:
    import torchvision  # noqa: F401
    HAS_TORCHVISION = True
Author	SHA1	Message	Date
pytorchbot	39901f2295	Fix lower precision check for MKLDNN on Windows (#122645 ) Fixes #120788 Pull Request resolved: https://github.com/pytorch/pytorch/pull/121618 Approved by: https://github.com/xuhancn, https://github.com/jgong5, https://github.com/mingfeima, https://github.com/seemethere (cherry picked from commit 03717430cc54609189cc7df593b2c96a99fb7f55) Co-authored-by: CaoE <e.cao@intel.com>	2024-03-25 17:33:04 -04:00
pytorchbot	9e6f42d369	Pin protobuf to 3.20.2 on macOS (#121918 ) (#122207 ) The newer protobuf 5.26.0 releasing on March 13rd is causing failures with `test_hparams_*` from `test_tensorboard` in which the stringify metadata is wrong when escaping double quote. For example, `3bc2bb6781`. This looks like an upstream issue from Tensorboard where it doesn't work with this brand new protobuf version https://github.com/tensorflow/tensorboard/blob/master/tensorboard/pip_package/requirements.txt#L29 The package has been pinned on Docker https://github.com/pytorch/pytorch/blob/main/.ci/docker/requirements-ci.txt#L155, so it should be pinned on macOS too. We want to eventually just have one requirements.txt file. Fixes https://github.com/pytorch/pytorch/issues/122008 Fixes https://github.com/pytorch/pytorch/issues/121927 Fixes https://github.com/pytorch/pytorch/issues/121946 Pull Request resolved: https://github.com/pytorch/pytorch/pull/121918 Approved by: https://github.com/kit1980 (cherry picked from commit 5f601a41e0a8c91ecf7ca5e4b95d752166ed9093) Co-authored-by: Huy Do <huydhn@gmail.com>	2024-03-19 11:41:52 -07:00
pytorchbot	13a5142f56	Fix MSVC 14.38 - VS 2022 Build (#122120 ) Fixes #115922 This PR is prepared to separate existing https://github.com/pytorch/pytorch/pull/116926 and to apply suggestions in the review. `scalar_t` which is defined as `c10::impl::ScalarTypeToCPPType<ScalarType::Half>::t` appears to be causing the issue with `Visual Studio 2022 17.8.4` (coming with `MSVC 14.38.33130`) Error message: ``` aten\src\ATen/cpu/vec/vec_base.h(150): fatal error C1001: Internal compiler error. (compiler file 'D:\a_work\1\s\src\vctools\Compiler\CxxFE\sl\p1\c\toinil.c', line 910) ``` --- Related line was added for a similar issue before as a workaround (`scalar_t` definition) [Fix compile error for vs2022](https://github.com/pytorch/pytorch/pull/85958) Pull Request resolved: https://github.com/pytorch/pytorch/pull/117497 Approved by: https://github.com/ezyang, https://github.com/malfet (cherry picked from commit fa86fa7a61e7cb85e1d193ed69d41757abe43310) Co-authored-by: Ozan Aydin <148207261+ozanMSFT@users.noreply.github.com>	2024-03-18 16:47:46 -04:00
pytorchbot	c1f8ec5a6f	chore: add unit test to verify split_by_tags output_type (#121262 ) (#122122 ) Add a test case as per https://github.com/pytorch/pytorch/pull/120361#issuecomment-1979163324 Pull Request resolved: https://github.com/pytorch/pytorch/pull/121262 Approved by: https://github.com/atalman (cherry picked from commit 0a1b3be2163ea99633f95c4927bd816eb713e9bd) Co-authored-by: Dheeraj Peri <peri.dheeraj@gmail.com>	2024-03-18 12:59:48 -07:00
pytorchbot	abe172eeaf	fix: set codegen in _SplitterBase partitioner (#120361 ) (#122121 ) For graphs with single output, the expectation of torch.export / torch.compile graph_module output type is a single torch.tensor instead of a tuple. However, after using `_SplitterBase` partitioner on these graph_module (obtained from torch.export/torch.compile), the resultant graph module will return a tuple of tensors, in this case `(output,)`. This PR adds codegen to the graphs produced by `_SplitterBase` partitioner. Setting this will ensure pytree unflatten nodes will be added automatically to handle unflattening of the output to return single outputs directly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120361 Approved by: https://github.com/angelayi (cherry picked from commit 15add24bf28477843a7e13d9deaa4beb39473900) Co-authored-by: Dheeraj Peri <peri.dheeraj@gmail.com>	2024-03-18 12:59:39 -07:00
pytorchbot	49022c752e	Fix missing permission in create release workflow (#118681 ) (#120518 ) Fixes https://github.com/pytorch/pytorch/actions/runs/7715417683/job/21029944543 Pull Request resolved: https://github.com/pytorch/pytorch/pull/118681 Approved by: https://github.com/clee2000, https://github.com/seemethere, https://github.com/atalman, https://github.com/malfet (cherry picked from commit 48f876143af4920cba34735429fa1f8ba75d42ca) Co-authored-by: Huy Do <huydhn@gmail.com>	2024-03-15 18:14:06 -07:00
Andrey Talman	5ba8a77a69	[Release only] Disable triton build workflows (#121934 )	2024-03-14 18:30:15 -04:00
pytorchbot	da3f59012f	[CPP] Update GCC minversion check to 9 or newer (#120126 ) (#121419 ) It's already a requirement for building PyTorch, but should be a requirement for linking extensions with it, as that can lead to runtime crashes, as `std::optional` template layout is incompatible between gcc-9 and older compilers. Also, update minimum supported clang version to 9.x(used to build Android), as clang-5 is clearly not C++17 compliant. Fixes https://github.com/pytorch/pytorch/issues/120020 Pull Request resolved: https://github.com/pytorch/pytorch/pull/120126 Approved by: https://github.com/Skylion007 (cherry picked from commit 3ad067fe2b969d17773e9ada918c67da829bb5cc) Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com>	2024-03-13 16:23:04 -07:00
Thiago Crepaldi	d37ef499da	Windows Dynamo Error Removal CI Check (#121026 ) Link to landed trunk PR (if applicable): * https://github.com/pytorch/pytorch/pull/115969 Criteria Category: * Low risk critical fixes for backwards compatibility Approved-by: PaliC, thiagocrepaldi	2024-03-12 12:43:53 -04:00
Mihir Patel	3184b6f719	[FSDP][StateDict] Allow FULL_STATE_DICT option for 2D (#120837 ) (#121250 ) Fixes #120722 TL;DR for the issue: As users are expected to use get_model_state_dict to do state_dict retrieval, I think it's fine to remove the warning and RuntimeError. More context in #120722. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120837 Approved by: https://github.com/Skylion007 Co-authored-by: wz337 <wz337@cornell.edu>	2024-03-08 08:14:19 -05:00
Andrey Talman	56a20680f0	Fix make triton command on release branch (#121169 ) (#121229 ) Fixes #120044 Should fix build from source instructions on release branch here: https://github.com/pytorch/pytorch#from-source Please note we are using /test/ channel for release here to make sure it works, before actual release is completed. Test main: ``` make triton pip3 uninstall -y triton WARNING: Skipping triton as it is not installed. Looking in indexes: https://download.pytorch.org/whl/nightly/ Collecting pytorch-triton==3.0.0+a9bc1a3647 Downloading https://download.pytorch.org/whl/nightly/pytorch_triton-3.0.0%2Ba9bc1a3647-cp310-cp310-linux_x86_64.whl (239.0 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 239.0/239.0 MB 8.7 MB/s eta 0:00:00 Requirement already satisfied: filelock in /home/atalman/miniconda3/envs/py310/lib/python3.10/site-packages (from pytorch-triton==3.0.0+a9bc1a3647) (3.13.1) Installing collected packages: pytorch-triton Attempting uninstall: pytorch-triton Found existing installation: pytorch-triton 2.2.0 Uninstalling pytorch-triton-2.2.0: Successfully uninstalled pytorch-triton-2.2.0 Successfully installed pytorch-triton-3.0.0+a9bc1a3647 ``` Test release/2.2: ``` make triton pip3 uninstall -y triton WARNING: Skipping triton as it is not installed. Looking in indexes: https://download.pytorch.org/whl/test/ Collecting pytorch-triton==2.2.0 Using cached https://download.pytorch.org/whl/test/pytorch_triton-2.2.0-cp310-cp310-linux_x86_64.whl (183.1 MB) Requirement already satisfied: filelock in /home/atalman/miniconda3/envs/py310/lib/python3.10/site-packages (from pytorch-triton==2.2.0) (3.13.1) Installing collected packages: pytorch-triton Attempting uninstall: pytorch-triton Found existing installation: pytorch-triton 3.0.0+a9bc1a3647 Uninstalling pytorch-triton-3.0.0+a9bc1a3647: Successfully uninstalled pytorch-triton-3.0.0+a9bc1a3647 Successfully installed pytorch-triton-2.2.0 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/121169 Approved by: https://github.com/seemethere	2024-03-07 12:49:44 -05:00
pytorchbot	f938615548	Don't use size on TensorVariable when doing out resize test (#121232 ) Fixes https://github.com/pytorch/pytorch/issues/120482 Fixes https://github.com/pytorch/pytorch/issues/120511 Signed-off-by: Edward Z. Yang <ezyang@meta.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/120567 Approved by: https://github.com/Skylion007 (cherry picked from commit 0f20cc1e0e474caec9183548e07cbaa5388bcdb3) Co-authored-by: Edward Z. Yang <ezyang@meta.com>	2024-03-07 11:24:58 -05:00