Fix test_randperm_device_compatibility for 1 GPU (#59484 ) (#59502 )

Summary: Do not try to create tensors on 2nd device if device_count() == 1 Fixes #{issue number} Pull Request resolved: https://github.com/pytorch/pytorch/pull/59484 Reviewed By: ngimel Differential Revision: D28910673 Pulled By: malfet fbshipit-source-id: e3517f31a463dd049ce8a5155409b7b716c8df18
Move CUDA async warning to suffix (#59467 ) (#59501 )
2025-10-29 19:24:55 +08:00 · 2021-06-04 20:01:02 -07:00 · 2021-06-04 20:00:55 -07:00 · 2021-06-04 20:00:45 -07:00 · 2021-06-04 18:34:39 -07:00 · 2021-06-04 11:15:58 -07:00
23313 changed files with 1146316 additions and 2950053 deletions
--- a/.azure_pipelines/build-pipeline.yml
+++ b/.azure_pipelines/build-pipeline.yml
@ -0,0 +1,63 @@
 # PyTorch CI Builds Pipeline on Azure DevOps
 #
 # This pipeline:
 #   1) builds PyTorch on select configurations
 #   2) runs only TestTorch unit tests.
 stages:
 - stage: 'Build'
  displayName: 'Build PyTorch'
  jobs:
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_CPU_docker
      pool: 'PyTorch-Linux-CPU'
      container_endpoint: pytorchms.azurecr.io
      build_stage: True
      is_ci_build: True
      os: ubuntu
      cuda: cpu
      customMatrixes:
        Py_38:
          configuration: ubuntu_1804_py_38_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: 'PyTorch-Linux-GPU'
      container_endpoint: pytorchms.azurecr.io
      build_stage: True
      is_ci_build: True
      os: ubuntu
      cuda: gpu
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
          CUDA_VERSION: 112
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_CPU
      pool: 'PyTorch-Win-CPU'
      build_stage: True
      is_ci_build: True
      os: windows
      cuda: cpu
      customMatrixes:
        Py_37:
          configuration: windows_2019_py_37_cpu
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_GPU
      pool: 'PyTorch-Win-GPU'
      build_stage: True
      is_ci_build: True
      os: windows
      cuda: gpu
      customMatrixes:
        Py_38_CUDA_102_cuDNN_765:
          configuration: windows_2019_py_38_cuda_102_cudnn_765
          CUDA_VERSION: 102
--- a/.azure_pipelines/daily-pipeline.yml
+++ b/.azure_pipelines/daily-pipeline.yml
@ -0,0 +1,82 @@
 # PyTorch Daily Builds Pipeline on Azure DevOps
 #
 # This pipeline:
 #   1) builds PyTorch on all available configurations
 #   2) runs all PyTorch unit tests
 stages:
 - stage: 'BuildTest'
  displayName: 'Build and Test PyTorch'
  jobs:
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_CPU_docker
      pool: 'PyTorch-Linux-CPU'
      container_endpoint: pytorchms.azurecr.io
      build_stage: True
      is_daily_build: True
      os: ubuntu
      cuda: cpu
      customMatrixes:
        Py_38:
          configuration: ubuntu_1804_py_38_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
        Py_37:
          configuration: ubuntu_1804_py_37_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: 'PyTorch-Linux-GPU'
      container_endpoint: pytorchms.azurecr.io
      build_stage: True
      is_daily_build: True
      os: ubuntu
      cuda: gpu
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_810:
          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_765:
          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
          CUDA_VERSION: 101
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_CPU
      pool: 'PyTorch-Win-CPU'
      build_stage: True
      is_daily_build: True
      os: windows
      cuda: cpu
      customMatrixes:
        Py_38:
          configuration: windows_2019_py_38_cpu
        Py_37:
          configuration: windows_2019_py_37_cpu
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_GPU
      pool: 'PyTorch-Win-GPU'
      build_stage: True
      is_daily_build: True
      os: windows
      cuda: gpu
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: windows_2019_py_39_cuda_112_cudnn_810
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_765:
          configuration: windows_2019_py_38_cuda_102_cudnn_765
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_764:
          configuration: windows_2019_py_37_cuda_101_cudnn_764
          CUDA_VERSION: 101
--- a/.azure_pipelines/job_templates/build-verify-publish-template-unix.yml
+++ b/.azure_pipelines/job_templates/build-verify-publish-template-unix.yml
@ -0,0 +1,134 @@
 # PyTorch build steps template with Unix images Azure DevOps Instances
 #
 # This build depends on 3 parameters set as environment variables in the pipeline:
 #   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
 #   - AZURE_DEVOPS_ARTIFACTS_ORGANIZATION: Azure Artifacts Organization name to publish artifacts
 #   - AZURE_DEVOPS_ARTIFACTS_PROJECT: Azure Artifacts Project name to publish artifacts
 parameters:
  name: ''
  pool: ''
  container_endpoint: ''
  os: ''
  cuda: ''
  is_ci_build: False
  is_official_build: False
  is_daily_build: False
  build_stage: False
  verify_stage: False
  publish_stage: False
  customMatrixes: ''
 jobs:
 - job: ${{parameters.name}}
  timeoutInMinutes: 300
  strategy:
    matrix:
      ${{ insert }}: ${{parameters.customMatrixes}}
  pool:
    name: ${{ parameters.pool}}
  variables:
    DECODE_PERCENTS: false
  container:
    image: $[variables['container_image']]
    endpoint: ${{parameters.container_endpoint}}
  steps:
  # Build stage
  - ${{ if eq(parameters.build_stage, 'True') }}:
    # Set up environment variables for specific pipeline build
    - template: set-environment-variables.yml
      parameters:
        os: ${{ parameters.os}}
        cuda: ${{ parameters.cuda}}
        is_official_build: ${{ parameters.is_official_build}}
    # Sync and update PyTorch submodules
    - bash: git submodule update --init --recursive
      displayName: Update PyTorch submodules
    # Build PyTorch and run unit tests - no packaging
    - ${{ if or(eq(parameters.is_ci_build, 'True'), eq(parameters.is_daily_build, 'True')) }}:
      # Build PyTorch from source in develop mode
      - bash: python setup.py develop
        displayName: Build PyTorch from source
      - ${{ if eq(parameters.is_ci_build, 'True') }}:
        # Run TestTorch unit tests to demonstrate successful PyTorch build
        - bash: python test/test_torch.py TestTorch
          displayName: Run TestTorch unit tests
      - ${{ if eq(parameters.is_daily_build, 'True') }}:
        # Run all unit tests to demonstrate successful PyTorch build
        - bash: python test/run_test.py --continue-through-error --exclude-jit-executor --verbose
          displayName: Run all unit tests
      # Run ComponentGovernance
      - task: ComponentGovernanceComponentDetection@0
        inputs:
          scanType: 'Register'
          verbosity: 'Verbose'
          alertWarningLevel: 'High'
    # Build PyTorch and produce artifacts for verification stage
    - ${{ if eq(parameters.is_official_build, 'True') }}:
      # Build PyTorch from source in install mode and exclude test binaries
      - bash: python setup.py install
        displayName: Build PyTorch from source without test binaries
      # Package PyTorch Wheel
      - bash: python setup.py bdist_wheel
        displayName: Package PyTorch Wheel
      # Publish PyTorch Wheel
      - task: PublishPipelineArtifact@1
        inputs:
          targetPath: $(Build.SourcesDirectory)/dist/
          artifactName: Build_$(Build.BuildNumber)_$(configuration)
        displayName: Publish PyTorch Wheel to Pipeline Artifacts
  # Verification stage
  - ${{ if eq(parameters.verify_stage, 'True') }}:
    # Download PyTorch Wheel
    - task: DownloadPipelineArtifact@2
      inputs:
        artifact: Build_$(Build.BuildNumber)_$(configuration)
        path: $(Build.SourcesDirectory)/verify
      displayName: Download PyTorch Wheel
    # Install PyTorch Wheel on Windows
    - bash: python -m pip install $(Build.SourcesDirectory)/verify/torch*linux*.whl
      displayName: Install PyTorch Wheel
    # Ensure PyTorch installed correctly from produced wheel
    - bash: |
        cd $(Build.SourcesDirectory)/verify
        python -c "import torch; print('Installed Torch version: ' + torch.__version__)"
      displayName: Check PyTorch correctly installed from wheel
  # Publishing stage
  - ${{ if eq(parameters.publish_stage, 'True') }}:
    # Download PyTorch Wheel
    - task: DownloadPipelineArtifact@2
      inputs:
        artifact: Build_$(Build.BuildNumber)_$(configuration)
        path: $(Build.SourcesDirectory)/publish
      displayName: Download PyTorch Wheel
    # Publish wheel to Azure Artifacts
    # The flag continueOnError=true is needed as the artifact to be published
    # may already exist, because the artifact is differentiated based on the
    # last commit date.
    - bash: |
        export TORCH_VERSION=$(head -c 5 ./version.txt)
        export LAST_COMMIT=$(git rev-parse --short HEAD)
        export LAST_COMMIT_DATE=$(git log -1 --pretty=%ad --date=format:%Y%m%d)
        cd $(Build.SourcesDirectory)/publish
        export TORCH_WHEEL=$(echo torch*linux*whl)
        az extension add -n azure-devops
        echo $ADOTOKEN | az devops login
        az artifacts universal publish --organization $AZURE_DEVOPS_ARTIFACTS_ORGANIZATION --project $AZURE_DEVOPS_ARTIFACTS_PROJECT --scope project --feed "PyTorch" --name $TORCH_WHEEL --description "PyTorch Official Build Artifact" --version $TORCH_VERSION-$LAST_COMMIT_DATE-$LAST_COMMIT --path .
      env:
        ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
      continueOnError: true
      displayName: Upload PyTorch Official Build package to Azure Artifacts
--- a/.azure_pipelines/job_templates/build-verify-publish-template-win.yml
+++ b/.azure_pipelines/job_templates/build-verify-publish-template-win.yml
@ -0,0 +1,150 @@
 # PyTorch build steps template with Windows images Azure DevOps Instances
 #
 # This build depends on 3 parameters set as environment variables in the pipeline:
 #   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
 #   - AZURE_DEVOPS_ARTIFACTS_ORGANIZATION: Azure Artifacts Organization name to publish artifacts
 #   - AZURE_DEVOPS_ARTIFACTS_PROJECT: Azure Artifacts Project name to publish artifacts
 parameters:
  name: ''
  pool: ''
  os: ''
  cuda: ''
  is_ci_build: False
  is_official_build: False
  is_daily_build: False
  build_stage: False
  verify_stage: False
  publish_stage: False
  customMatrixes: ''
 jobs:
 - job: ${{parameters.name}}
  timeoutInMinutes: 300
  strategy:
    matrix:
      ${{ insert }}: ${{parameters.customMatrixes}}
  pool:
    name: ${{ parameters.pool}}
  variables:
    CMAKE_GENERATOR: Ninja
    PACKAGE_PDBS: 0
  steps:
  # Prepare for PyTorch build on Windows
  - template: prepare-build-template.yml
    parameters:
      configuration: $(configuration)
      build_stage: ${{ parameters.build_stage}}
  # Build Stage
  - ${{ if eq(parameters.build_stage, 'True') }}:
    # Set up environment variables for specific pipeline build
    - template: set-environment-variables.yml
      parameters:
        os: ${{ parameters.os}}
        cuda: ${{ parameters.cuda}}
        is_official_build: ${{ parameters.is_official_build}}
    # Sync and update PyTorch submodules
    - script: git submodule update --init --recursive
      displayName: Update PyTorch submodules
    # Build PyTorch and run unit tests - no packaging
    - ${{ if or(eq(parameters.is_ci_build, 'True'), eq(parameters.is_daily_build, 'True')) }}:
      # Build PyTorch from source in develop mode with Ninja
      - script: call activate $(configuration) && python setup.py develop
        displayName: Build PyTorch from source
      - ${{ if eq(parameters.is_ci_build, 'True') }}:
        # Run TestTorch unit tests to demonstrate successful PyTorch build
        - script: call activate $(configuration) && python test\test_torch.py TestTorch
          displayName: Run TestTorch unit tests
      - ${{ if eq(parameters.is_daily_build, 'True') }}:
        # Run all unit tests to demonstrate successful PyTorch build
        - script: call activate $(configuration) && python test/run_test.py --continue-through-error --exclude-jit-executor --verbose
          displayName: Run all unit tests
      # Run ComponentGovernance
      - task: ComponentGovernanceComponentDetection@0
        inputs:
          scanType: 'Register'
          verbosity: 'Verbose'
          alertWarningLevel: 'High'
    # Build PyTorch and produce artifacts for verification stage
    - ${{ if eq(parameters.is_official_build, 'True') }}:
      # Build PyTorch from source in install mode with Ninja and exclude test binaries
      - script: call activate $(configuration) && python setup.py install
        displayName: Build PyTorch from source without test binaries
      # Package PyTorch Wheel
      - script: call activate $(configuration) && python setup.py bdist_wheel
        displayName: Package PyTorch Wheel
      # Publish PyTorch Wheel
      - task: PublishPipelineArtifact@1
        inputs:
          targetPath: $(Build.SourcesDirectory)\dist\
          artifactName: Build_$(Build.BuildNumber)_$(configuration)
        displayName: Publish PyTorch Wheel to Pipeline Artifacts
  # Verification Stage
  - ${{ if eq(parameters.verify_stage, 'True') }}:
    # Download PyTorch Wheel
    - task: DownloadPipelineArtifact@2
      inputs:
        artifact: Build_$(Build.BuildNumber)_$(configuration)
        path: $(Build.SourcesDirectory)\verify
      displayName: Download PyTorch Wheel
    # Install PyTorch Wheel on Windows
    - script: |
        call activate $(configuration)
        cd $(Build.SourcesDirectory)\verify
        dir torch*win*.whl /b > whl.txt
        set /p whl= < whl.txt
        python -m pip install %whl%
      displayName: Install PyTorch Wheel
    # Ensure PyTorch installed correctly from produced wheel
    - script: |
        call activate $(configuration)
        cd $(Build.SourcesDirectory)\verify
        python -c "import torch; print('Installed Torch version: ' + torch.__version__)"
      displayName: Check PyTorch correctly installed from wheel
  # Publishing stage
  - ${{ if eq(parameters.publish_stage, 'True') }}:
    # Download PyTorch Wheel
    - task: DownloadPipelineArtifact@2
      inputs:
        artifact: Build_$(Build.BuildNumber)_$(configuration)
        path: $(Build.SourcesDirectory)\publish
      displayName: Download PyTorch Wheel
    # Set up Azure Artifacts for Windows
    # The pip install --upgrade command is a bug fix for Azure CLI on Windows
    # More info: https://github.com/Azure/azure-cli/issues/16858
    - script: |
        pip install --upgrade pip --target \opt\az\lib\python3.6\site-packages\
        az extension add -n azure-devops
      displayName: Set up Azure Artifacts download on Windows
    # Publish wheel to Azure Artifacts
    # The flag continueOnError=true is needed as the artifact to be published
    # may already exist, because the artifact is differentiated based on the
    # last commit date.
    - script: |
        set /p TORCH_VERSION= < version.txt
        cd $(Build.SourcesDirectory)\publish
        git rev-parse --short HEAD > last_commit.txt && set /p LAST_COMMIT= < last_commit.txt
        git log -1 --pretty=%ad --date=format:%Y%m%d > last_commit_date.txt && set /p LAST_COMMIT_DATE= < last_commit_date.txt
        dir torch*win*.whl /b > whl.txt && set /p TORCH_WHEEL= < whl.txt
        echo %ADOTOKEN% | az devops login
        az artifacts universal publish --organization %AZURE_DEVOPS_ARTIFACTS_ORGANIZATION% --project %AZURE_DEVOPS_ARTIFACTS_PROJECT% --scope project --feed "PyTorch" --name %TORCH_WHEEL% --description "PyTorch Official Build Artifact" --version %TORCH_VERSION:~0,5%-%LAST_COMMIT_DATE%-%LAST_COMMIT% --path .
      env:
        ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
      continueOnError: true
      displayName: Upload PyTorch nigthly package to Azure Artifacts
--- a/.azure_pipelines/job_templates/common-packages.yml
+++ b/.azure_pipelines/job_templates/common-packages.yml
@ -0,0 +1,17 @@
 dependencies:
  - python=PYTHON_VERSION
  - numpy
  - ninja
  - pyyaml
  - mkl
  - mkl-include
  - setuptools
  - cmake
  - cffi
  - typing_extensions
  - future
  - six
  - requests
  - dataclasses
  - pip:
    - -r ../../requirements.txt
--- a/.azure_pipelines/job_templates/prepare-build-template.yml
+++ b/.azure_pipelines/job_templates/prepare-build-template.yml
@ -0,0 +1,62 @@
 # Build prepare steps for PyTorch on Azure DevOps to build from source.
 # These steps share between normal build process and semmle security scan tasks
 parameters:
  build_stage: False
  configuration: ''
 steps:
 # End Python tasks that may be lingering over from previous runs
 # Note: If python.exe isn't currently running, exit code becomes 128,
 # which fails the run. Here exit code is set to 0 to avoid failed run.
 - script: |
    taskkill /f /im python.exe
    IF %ERRORLEVEL% EQU 128 exit 0
  displayName: End previous Python processes
 # Clean up env directory in conda for fresh builds and set up conda environment YAML
 - powershell: |
    Remove-Item 'C:\Miniconda\envs' -Recurse -ErrorAction Ignore
    $env:PYTHON_VERSION = $env:SYSTEM_JOBNAME.Substring(3,1) + '.' + $env:SYSTEM_JOBNAME.Substring(4,1)
    (Get-Content .azure_pipelines\job_templates\common-packages.yml) -replace 'PYTHON_VERSION', $env:PYTHON_VERSION | Out-File -encoding ASCII .azure_pipelines\job_templates\common-packages.yml
  displayName: Clean up previous environments and Set up conda environment YAML
 # Make conda environment and install required packages
 - script: |
    call conda clean --all -y
    call conda env create -n $(configuration) --file .azure_pipelines\job_templates\common-packages.yml
    call activate $(configuration)
    call conda install -c conda-forge libuv=1.39
  displayName: Set up conda environment for building from source
 - ${{ if eq(parameters.build_stage, 'True') }}:
  # Install MKL
  - script: |
      rmdir /s /q mkl
      del mkl_2020.2.254.7z
      curl https://s3.amazonaws.com/ossci-windows/mkl_2020.2.254.7z -k -O
      7z x -aoa mkl_2020.2.254.7z -omkl
    displayName: Install MKL
  # Install sccache and randomtemp
  # Related PyTorch GitHub issue: https://github.com/pytorch/pytorch/issues/25393
  # Related fix: https://github.com/pytorch/builder/pull/448/
  - script: |
      mkdir .\tmp_bin
      curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output .\tmp_bin\sccache.exe
      curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output .\tmp_bin\sccache-cl.exe
      copy .\tmp_bin\sccache.exe .\tmp_bin\nvcc.exe
      curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.3/randomtemp.exe --output .\tmp_bin\randomtemp.exe
    displayName: Install sccache and randomtemp
    condition: not(eq(variables.CUDA_VERSION, ''))
  # CUDA 11.2's CUB directory conflicts with CUDA 10.2 and 10.1
  # builds, where CUDA 11.2's CUB is injected into non-CUDA
  # 11.2 builds.
  - powershell: Remove-Item "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include\cub" -Recurse -ErrorAction Ignore
    displayName: Remove conflicting CUB from CUDA installation
    condition: not(eq(variables.CUDA_VERSION, ''))
  - powershell: Copy-Item -Path "F:\cuda_11_2\cub\" -Destination "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include" -Recurse
    displayName: Copy CUDA CUB for CUDA 11.2 build
    condition: eq(variables.CUDA_VERSION, '112')
--- a/.azure_pipelines/job_templates/pytorch-template-unix.yml
+++ b/.azure_pipelines/job_templates/pytorch-template-unix.yml
@ -0,0 +1,51 @@
 # PyTorch build steps template with Unix images Azure DevOps Instances
 #
 # This build depends on 5 parameters set as an environment variables in the pipeline:
 #   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
 #   - AZURE_STORAGE_KEY: Secret var for authenticating to Azure Storage
 #   - _TS_CLONE_P, _TS_P, _TS_SM_P: Secret vars for specific unit tests
 parameters:
  name: ''
  pool: ''
  container_endpoint: ''
  customMatrixes: ''
 jobs:
 - job: ${{parameters.name}}
  timeoutInMinutes: 600
  strategy:
    matrix:
      ${{ insert }}: ${{parameters.customMatrixes}}
  pool:
    name: ${{ parameters.pool}}
  variables:
    DECODE_PERCENTS: false
  steps:
  # Don't checkout repo contents to save time and CPU compute. Environment variables
  # related to checkout branch such as $(BUILD_SOURCEBRANCH) are still available.
  - checkout: none
  # Delete pytorch_tests repo from previous builds if exists
  - bash: rm -rf pytorch_tests/
    displayName: Delete pytorch_tests repo from previous builds if exists
  # Clone PyTorch Tests repository
  - bash: |
      B64_PAT=$(printf "%s"":$_ADOTOKEN" | base64)
      git -c http.extraHeader="Authorization: Basic ${B64_PAT}" clone $(AZURE_DEVOPS_PYTORCH_TESTS_REPO_URL)
      cd pytorch_tests
      git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
    env:
      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
    displayName: Clone PyTorch Tests repo
  # Run PyTorch Unit Tests
  - bash: bash $(Build.SourcesDirectory)/pytorch_tests/scripts/linux/run.sh
    env:
      _AZURE_STORAGE_KEY: $(AZURE_STORAGE_KEY)
      _TS_CLONE_P: $(TS_CLONE_PASSWORD)
      _TS_P: $(TS_PAT)
      _TS_SM_P: $(TS_SM_PAT)
    displayName: Run PyTorch Unit Tests
--- a/.azure_pipelines/job_templates/pytorch-template-win.yml
+++ b/.azure_pipelines/job_templates/pytorch-template-win.yml
@ -0,0 +1,49 @@
 # PyTorch build steps template with Windows images Azure DevOps Instances
 #
 # This build depends on 5 parameters set as an environment variables in the pipeline:
 #   - AZURE_DEVOPS_CLI_PAT: Secret var for authenticating to Azure DevOps
 #   - AZURE_STORAGE_KEY: Secret var for authenticating to Azure Storage
 #   - _TS_CLONE_P, _TS_P, _TS_SM_P: Secret vars for specific unit tests
 parameters:
  name: ''
  pool: ''
  customMatrixes: ''
 jobs:
 - job: ${{parameters.name}}
  timeoutInMinutes: 600
  strategy:
    matrix:
      ${{ insert }}: ${{parameters.customMatrixes}}
  pool:
    name: ${{ parameters.pool}}
  steps:
  # Don't checkout repo contents to save time and CPU compute. Environment variables
  # related to checkout branch such as $(BUILD_SOURCEBRANCH) are still available.
  - checkout: none
  # Delete pytorch_tests repo from previous builds if exists
  - script: if exist "pytorch_tests/" rmdir "pytorch_tests/" /q /s
    displayName: Delete pytorch_tests repo from previous builds if exists
  # Clone PyTorch Tests repository
  - powershell: |
      $env:B64Pat = [Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes(":$env:_ADOTOKEN"))
      git -c http.extraHeader="Authorization: Basic $env:B64Pat" clone $env:AZURE_DEVOPS_pytorch_tests_REPO_URL
      cd pytorch_tests
      git checkout $(PYTORCH_TESTS_CHECKOUT_BRANCH)
    env:
      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
    displayName: Clone PyTorch Tests repo
  # Run PyTorch Unit Tests
  - script: call $(Build.SourcesDirectory)\pytorch_tests\scripts\windows\run.bat
    env:
      _ADOTOKEN: $(AZURE_DEVOPS_CLI_PAT)
      _AZURE_STORAGE_KEY: $(AZURE_STORAGE_KEY)
      _TS_CLONE_P: $(TS_CLONE_PASSWORD)
      _TS_P: $(TS_PAT)
      _TS_SM_P: $(TS_SM_PAT)
    displayName: Run PyTorch Unit Tests
--- a/.azure_pipelines/job_templates/set-environment-variables.yml
+++ b/.azure_pipelines/job_templates/set-environment-variables.yml
@ -0,0 +1,131 @@
 # Set environment variables for specific configurations
 parameters:
  is_official_build: False
  os: ''
  cuda: ''
 steps:
  # Environment configuration steps for Ubuntu builds
  - ${{ if contains(parameters.os, 'ubuntu') }}:
    # Set configuration specific build flags
    - ${{ if eq(parameters.is_official_build, True) }}:
      - bash: |
          echo "##vso[task.setvariable variable=INSTALL_TEST;]0"
          echo "##vso[task.setvariable variable=PYTORCH_BUILD_NUMBER;]1"
          export PYTORCH_VERSION=$(head -c 5 ./version.txt)
          echo "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$PYTORCH_VERSION.dev"
        displayName: Set configuration-specific build flags
      # Set PyTorch CPU/GPU build flags.
      - ${{ if contains(parameters.cuda, 'cpu') }}:
        - bash: |
            echo "##vso[task.setvariable variable=USE_CUDA;]0"
            echo "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cpu"
          displayName: Set CUDA-specific build flag for CPU builds
      - ${{ if contains(parameters.cuda, 'gpu') }}:
        - bash: |
            echo "##vso[task.setvariable variable=USE_CUDA;]1"
            echo "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cu$(CUDA_VERSION)"
          displayName: Set CUDA-specific build flag for GPU builds
    # Set MKL environment variables
    - bash: |
        echo "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]/opt/intel/lib:$CMAKE_LIBRARY_PATH"
        echo "##vso[task.setvariable variable=CMAKE_INCLUDE_PATH;]/opt/intel/include:$CMAKE_INCLUDE_PATH"
      displayName: Set MKL paths
    # View current environment variables
    - bash:
        printenv
      displayName: Show environment variables
  # Environment configuration steps for Windows builds
  - ${{ if contains(parameters.os, 'windows') }}:
    # Set Conda Lib Path
    - powershell: Write-Host "##vso[task.setvariable variable=CONDA_LIB_PATH;]C:\Miniconda\envs\$(configuration)\Library\bin"
      displayName: Set Conda Lib Path
    # Set configuration specific build flags
    - ${{ if eq(parameters.is_official_build, True) }}:
      - powershell: |
          Write-Host "##vso[task.setvariable variable=INSTALL_TEST;]0"
          Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_NUMBER;]1"
          Set-Variable -Name PYTORCH_VERSION -Value (Get-Content .\version.txt).Substring(0,5)
          Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$PYTORCH_VERSION.dev"
        displayName: Set configuration-specific build flags
      # Set PyTorch CPU/GPU build flags..
      - ${{ if contains(parameters.cuda, 'cpu') }}:
        - powershell: |
            Write-Host "##vso[task.setvariable variable=USE_CUDA;]0"
            Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cpu"
          displayName: Set CUDA-specific build flag for CPU build
      - ${{ if contains(parameters.cuda, 'gpu') }}:
        - powershell: |
            Write-Host "##vso[task.setvariable variable=USE_CUDA;]1"
            Write-Host "##vso[task.setvariable variable=PYTORCH_BUILD_VERSION;]$(PYTORCH_BUILD_VERSION).cu$(CUDA_VERSION)"
          displayName: Set CUDA-specific build flag for GPU build
    # Set CUDA 11.2, 10.2 or 10.1 specific build flags
    - ${{ if eq(parameters.cuda, 'gpu') }}:
      - powershell: |
          Write-Host "##vso[task.setvariable variable=TORCH_CUDA_ARCH_LIST;]3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6"
          Write-Host "##vso[task.setvariable variable=CUDA_PATH;]C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\"
        displayName: Set CUDA 11.2 specific build flags
        condition: eq(variables.CUDA_VERSION, '112')
      - powershell: |
          Write-Host "##vso[task.setvariable variable=TORCH_CUDA_ARCH_LIST;]3.7+PTX;5.0;6.0;6.1;7.0;7.5"
          Write-Host "##vso[task.setvariable variable=CUDA_PATH;]C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\"
        displayName: Set CUDA 10.2 specific build flags
        condition: eq(variables.CUDA_VERSION, '102')
      - powershell: |
          Write-Host "##vso[task.setvariable variable=TORCH_CUDA_ARCH_LIST;]3.7+PTX;5.0;6.0;6.1;7.0;7.5"
          Write-Host "##vso[task.setvariable variable=CUDA_PATH;]C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\"
        displayName: Set CUDA 10.1 specific build flags
        condition: eq(variables.CUDA_VERSION, '101')
      - powershell: |
          Write-Host "##vso[task.setvariable variable=CUDA_BIN_PATH;]$env:CUDA_PATH\bin\"
          Write-Host "##vso[task.setvariable variable=CUDNN_ROOT;]$env:CUDA_PATH"
          Write-Host "##vso[task.setvariable variable=CUDNN_INCLUDE_DIR;]$env:CUDA_PATH\include\"
          Write-Host "##vso[task.setvariable variable=CUDNN_LIBRARY;]$env:CUDA_PATH\lib\x64\"
          Write-Host "##vso[task.prependpath]$env:CUDA_PATH\bin"
          Write-Host "##vso[task.setvariable variable=TORCH_NVCC_FLAGS;]-Xfatbin -compress-all --no-host-device-move-forward"
          Write-Host "##vso[task.setvariable variable=THRUST_IGNORE_CUB_VERSION_CHECK;]1"
          Write-Host "##vso[task.setvariable variable=NVTOOLSEXT_PATH;]C:\Program Files\NVIDIA Corporation\NvToolsExt\"
        displayName: Set CUDA environment variables
      - powershell: |
          copy "$(CUDA_BIN_PATH)\cusparse*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\cublas*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\cudart*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\curand*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\cufft*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\cusolver*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\cudnn*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CUDA_BIN_PATH)\nvrtc*64_*.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" $(Build.SourcesDirectory)\torch\lib
          copy "$(CONDA_LIB_PATH)\libiomp*5md.dll" $(Build.SourcesDirectory)\torch\lib
          copy "$(CONDA_LIB_PATH)\uv.dll" $(Build.SourcesDirectory)\torch\lib
        displayName: Copy CUDA/cuDNN/libomp/libuv dlls to torch\lib
    # Set MKL, sccache and randomtemp environment variables
    - powershell: |
        Write-Host "##vso[task.setvariable variable=CMAKE_INCLUDE_PATH;]$(Build.SourcesDirectory)\mkl\include"
        Write-Host "##vso[task.setvariable variable=CMAKE_LIBRARY_PATH;]$(Build.SourcesDirectory)\mkl\lib;$env:CMAKE_LIBRARY_PATH"
        Write-Host "##vso[task.setvariable variable=ADDITIONAL_PATH;]$(Build.SourcesDirectory)\tmp_bin"
        Write-Host "##vso[task.setvariable variable=SCCACHE_IDLE_TIMEOUT;]1500"
        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\nvcc.exe"
        Write-Host "##vso[task.setvariable variable=CUDA_NVCC_EXECUTABLE;]$(Build.SourcesDirectory)\tmp_bin\randomtemp.exe"
        Write-Host "##vso[task.setvariable variable=RANDOMTEMP_BASEDIR;]$(Build.SourcesDirectory)\tmp_bin"
      displayName: Set MKL, sccache and randomtemp environment variables
    # View current environment variables
    - script:
        set
      displayName: Show environment variables
--- a/.azure_pipelines/job_templates/wheel-wait-job-template.yml
+++ b/.azure_pipelines/job_templates/wheel-wait-job-template.yml
@ -0,0 +1,14 @@
 # Main logic to initiate wait for PR artifact to be ready
 steps:
 - task: InvokeRESTAPI@1
  displayName: 'Wait for job success and wheel ready'
  timeoutInMinutes: 60
  inputs:
    connectionType: 'connectedServiceName'
    serviceConnection: circleciconn
    method: 'POST'
    headers: '{"Content-Type":"application/json", "BranchName":"$(TARGET_BRANCH_TO_CHECK_PR)", "JobName":"$(TARGET_CIRCLECI_PR)", "PlanUrl":"$(System.CollectionUri)", "ProjectId":"$(System.TeamProjectId)", "HubName":"$(System.HostType)", "PlanId":"$(System.PlanId)", "JobId":"$(System.JobId)", "TimelineId":"$(System.TimelineId)", "TaskInstanceId":"$(System.TaskInstanceId)", "AuthToken":"$(System.AccessToken)"}'
    body: ''
    urlSuffix: 'api/JobStatus'
    waitForCompletion: true
--- a/.azure_pipelines/job_templates/wheel-wait-template.yml
+++ b/.azure_pipelines/job_templates/wheel-wait-template.yml
@ -0,0 +1,49 @@
 # Initiate 5 agentless-server waiting jobs to check on the
 # status of PR artifact builds, for a maximum wait time of
 # 5 * 60 min =300 minutes. These jobs will pass immediately
 # once targeted CircleCI build is ready.
 jobs:
 - job: checkjob1
  pool: server
  timeoutInMinutes: 60
  continueOnError: true
  steps:
  - template: wheel-wait-job-template.yml
 - job: checkjob2
  pool: server
  timeoutInMinutes: 60
  dependsOn: checkjob1
  continueOnError: true
  steps:
  - template: wheel-wait-job-template.yml
 - job: checkjob3
  pool: server
  timeoutInMinutes: 60
  dependsOn: checkjob2
  continueOnError: true
  steps:
  - template: wheel-wait-job-template.yml
 - job: checkjob4
  pool: server
  timeoutInMinutes: 60
  dependsOn: checkjob3
  continueOnError: true
  steps:
  - template: wheel-wait-job-template.yml
 - job: checkjob5
  pool: server
  timeoutInMinutes: 60
  dependsOn: checkjob4
  continueOnError: true
  steps:
  - template: wheel-wait-job-template.yml
--- a/.azure_pipelines/nightly-pytorch-tests-pipeline.yml
+++ b/.azure_pipelines/nightly-pytorch-tests-pipeline.yml
@ -0,0 +1,50 @@
 # PyTorch Nightly PyTorch Tests Builds Pipeline on Azure DevOps
 #
 # This pipeline runs custom PyTorch unit-tests on nightly
 # PyTorch wheels.
 stages:
 - stage: 'NightlyCustomTests'
  displayName: 'Run custom unit tests on PyTorch wheels'
  jobs:
  - template: job_templates/pytorch-template-unix.yml
    parameters:
      name: ubuntu_1804_CPU_docker
      pool: $(BUILD_POOL_LIN_1)
      customMatrixes:
        Nightly_Custom_Tests:
          _DOCKER_IMAGE: $(DOCKER_IMAGE_LIN_1)
          _PYTHON_VERSION: $(PYTHON_VERSION_LIN_1)
          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_LIN_1)
          _RUN_TESTS: $(RUN_TESTS_LIN)
  - template: job_templates/pytorch-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: $(BUILD_POOL_LIN_2)
      customMatrixes:
        Nightly_Custom_Tests:
          _DOCKER_IMAGE: $(DOCKER_IMAGE_LIN_2)
          _PYTHON_VERSION: $(PYTHON_VERSION_LIN_2)
          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_LIN_2)
          _RUN_TESTS: $(RUN_TESTS_LIN)
  - template: job_templates/pytorch-template-win.yml
    parameters:
      name: windows_2019_CPU
      pool: $(BUILD_POOL_WIN_1)
      customMatrixes:
        Nightly_Custom_Tests:
          _PYTHON_VERSION: $(PYTHON_VERSION_WIN_1)
          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_1)
          _RUN_TESTS: $(RUN_TESTS_WIN)
  - template: job_templates/pytorch-template-win.yml
    parameters:
      name: windows_2019_GPU
      pool: $(BUILD_POOL_WIN_2)
      customMatrixes:
        Nightly_Custom_Tests:
          _PYTHON_VERSION: $(PYTHON_VERSION_WIN_2)
          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_WIN_2)
          _RUN_TESTS: $(RUN_TESTS_WIN)
--- a/.azure_pipelines/pytorch-tests-pipeline.yml
+++ b/.azure_pipelines/pytorch-tests-pipeline.yml
@ -0,0 +1,30 @@
 # PyTorch PR PyTorch Tests Builds Pipeline on Azure DevOps
 #
 # This pipeline:
 #   1) ensures that CircleCI builds for a given PR
 #      have finished, and that its artifacts are
 #      ready for download
 #   2) runs custom PyTorch unit-tests on PyTorch
 #      wheels generated during PR builds.
 stages:
 - stage: 'EnsureArtifactsReady'
  displayName: 'Ensure PyTorch PR Artifacts are ready'
  jobs:
  - template: job_templates/wheel-wait-template.yml
 - stage: 'PRCustomTests'
  displayName: 'Run custom unit tests on PyTorch wheels'
  jobs:
  - template: job_templates/pytorch-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: $(BUILD_POOL_PR)
      customMatrixes:
        PR_Custom_Tests:
          _PYTHON_VERSION: $(PYTHON_VERSION_PR)
          _CUDA_BUILD_VERSION: $(CUDA_BUILD_VERSION_PR)
          _TARGET_CIRCLECI_BUILD: $(TARGET_CIRCLECI_PR)
          _TARGET_BRANCH_TO_CHECK: $(TARGET_BRANCH_TO_CHECK_PR)
          _DOCKER_IMAGE: $(DOCKER_IMAGE_PR)
          _RUN_TESTS: $(RUN_TESTS_PR)
--- a/.azure_pipelines/verify-pipeline.yml
+++ b/.azure_pipelines/verify-pipeline.yml
@ -0,0 +1,224 @@
 # PyTorch Official Builds Pipeline on Azure DevOps
 #
 # This pipeline:
 #   1) builds PyTorch on all available configurations
 #   2) verifies PyTorch artifacts by installing them in a clean environment
 #      and checking torch.__version_
 #   3) publishes official PyTorch artifacts to Azure DevOps Artifacts for consumption
 stages:
 - stage: 'Build'
  displayName: 'Build PyTorch'
  jobs:
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_CPU_docker
      pool: 'PyTorch-Linux-CPU'
      container_endpoint: pytorchms.azurecr.io
      build_stage: True
      is_official_build: True
      os: ubuntu
      cuda: cpu
      customMatrixes:
        Py_38:
          configuration: ubuntu_1804_py_38_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
        Py_37:
          configuration: ubuntu_1804_py_37_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: 'PyTorch-Linux-GPU'
      container_endpoint: pytorchms.azurecr.io
      build_stage: True
      is_official_build: True
      os: ubuntu
      cuda: gpu
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_810:
          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_765:
          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
          CUDA_VERSION: 101
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_CPU
      pool: 'PyTorch-Win-CPU'
      build_stage: True
      is_official_build: True
      os: windows
      cuda: cpu
      customMatrixes:
        Py_38:
          configuration: windows_2019_py_38_cpu
        Py_37:
          configuration: windows_2019_py_37_cpu
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_GPU
      pool: 'PyTorch-Win-GPU'
      build_stage: True
      is_official_build: True
      os: windows
      cuda: gpu
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: windows_2019_py_39_cuda_112_cudnn_810
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_765:
          configuration: windows_2019_py_38_cuda_102_cudnn_765
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_764:
          configuration: windows_2019_py_37_cuda_101_cudnn_764
          CUDA_VERSION: 101
 - stage: 'Verify'
  displayName: 'Verify PyTorch wheels'
  dependsOn: Build
  condition: succeeded()
  jobs:
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_CPU_docker
      pool: 'PyTorch-Linux-CPU'
      container_endpoint: pytorchms.azurecr.io
      verify_stage: True
      is_official_build: True
      customMatrixes:
        Py_38:
          configuration: ubuntu_1804_py_38_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
        Py_37:
          configuration: ubuntu_1804_py_37_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: 'PyTorch-Linux-GPU'
      container_endpoint: pytorchms.azurecr.io
      verify_stage: True
      is_official_build: True
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_810:
          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_765:
          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
          CUDA_VERSION: 101
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_CPU
      pool: 'PyTorch-Win-CPU'
      verify_stage: True
      is_official_build: True
      customMatrixes:
        Py_38:
          configuration: windows_2019_py_38_cpu
        Py_37:
          configuration: windows_2019_py_37_cpu
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_GPU
      pool: 'PyTorch-Win-GPU'
      verify_stage: True
      is_official_build: True
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: windows_2019_py_39_cuda_112_cudnn_810
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_765:
          configuration: windows_2019_py_38_cuda_102_cudnn_765
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_764:
          configuration: windows_2019_py_37_cuda_101_cudnn_764
          CUDA_VERSION: 101
 - stage: 'Publish'
  displayName: 'Publish PyTorch wheels'
  dependsOn: Verify
  condition: succeeded()
  jobs:
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_CPU_docker
      pool: 'PyTorch-Linux-CPU'
      container_endpoint: pytorchms.azurecr.io
      publish_stage: True
      is_official_build: True
      customMatrixes:
        Py_38:
          configuration: ubuntu_1804_py_38_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cpu_dev
        Py_37:
          configuration: ubuntu_1804_py_37_cpu
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cpu_dev
  - template: job_templates/build-verify-publish-template-unix.yml
    parameters:
      name: ubuntu_1804_GPU_docker
      pool: 'PyTorch-Linux-GPU'
      container_endpoint: pytorchms.azurecr.io
      publish_stage: True
      is_official_build: True
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: ubuntu_1804_py_39_cuda_112_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_39_cuda_112_cudnn_8_dev
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_810:
          configuration: ubuntu_1804_py_38_cuda_102_cudnn_810
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_38_cuda_102_cudnn_8_dev
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_765:
          configuration: ubuntu_1804_py_37_cuda_101_cudnn_765
          container_image: pytorchms.azurecr.io/ubuntu_1804_py_37_cuda_101_cudnn_7_dev
          CUDA_VERSION: 101
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_CPU
      pool: 'PyTorch-Win-CPU'
      publish_stage: True
      is_official_build: True
      customMatrixes:
        Py_38:
          configuration: windows_2019_py_38_cpu
        Py_37:
          configuration: windows_2019_py_37_cpu
  - template: job_templates/build-verify-publish-template-win.yml
    parameters:
      name: windows_2019_GPU
      pool: 'PyTorch-Win-GPU'
      publish_stage: True
      is_official_build: True
      customMatrixes:
        Py_39_CUDA_112_cuDNN_810:
          configuration: windows_2019_py_39_cuda_112_cudnn_810
          CUDA_VERSION: 112
        Py_38_CUDA_102_cuDNN_765:
          configuration: windows_2019_py_38_cuda_102_cudnn_765
          CUDA_VERSION: 102
        Py_37_CUDA_101_cuDNN_764:
          configuration: windows_2019_py_37_cuda_101_cudnn_764
          CUDA_VERSION: 101
--- a/.bazelignore
+++ b/.bazelignore
@ -1,4 +0,0 @@
 # We do not use this library in our Bazel build. It contains an
 # infinitely recursing symlink that makes Bazel very unhappy.
 third_party/ittapi/
 third_party/opentelemetry-cpp
--- a/.bazelrc
+++ b/.bazelrc
@ -1,114 +1,3 @@
-build --cxxopt=--std=c++17
+build --copt=--std=c++14
 build --copt=-I.
 # Bazel does not support including its cc_library targets as system
 # headers. We work around this for generated code
 # (e.g. torch/headeronly/macros/cmake_macros.h) by making the generated directory a
 # system include path.
 build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
 build --copt=-isystem --copt bazel-out/darwin-fastbuild/bin
 build --experimental_ui_max_stdouterr_bytes=2048576
 # Configuration to disable tty features for environments like CI
 build:no-tty --curses no
 build:no-tty --progress_report_interval 10
 build:no-tty --show_progress_rate_limit 10
 # Build with GPU support by default.
 build --define=cuda=true
 # rules_cuda configuration
 build --@rules_cuda//cuda:enable_cuda
 build --@rules_cuda//cuda:cuda_targets=sm_52
 build --@rules_cuda//cuda:compiler=nvcc
 build --repo_env=CUDA_PATH=/usr/local/cuda
 # Configuration to build without GPU support
 build:cpu-only --define=cuda=false
 # define a separate build folder for faster switching between configs
 build:cpu-only --platform_suffix=-cpu-only
 # See the note on the config-less build for details about why we are
 # doing this. We must also do it for the "-cpu-only" platform suffix.
 build --copt=-isystem --copt=bazel-out/k8-fastbuild-cpu-only/bin
 # rules_cuda configuration
 build:cpu-only --@rules_cuda//cuda:enable_cuda=False
 # Definition of --config=shell
 # interactive shell immediately before execution
 build:shell --run_under="//tools/bazel_tools:shellwrap"
 # Disable all warnings for external repositories. We don't care about
 # their warnings.
 build --per_file_copt=^external/@-w
 # Set additional warnings to error level.
 #
 # Implementation notes:
 #  * we use file extensions to determine if we are using the C++
 #    compiler or the cuda compiler
 #  * we use ^// at the start of the regex to only permit matching
 #    PyTorch files. This excludes external repos.
 #
 # Note that because this is logically a command-line flag, it is
 # considered the word on what warnings are enabled. This has the
 # unfortunate consequence of preventing us from disabling an error at
 # the target level because those flags will come before these flags in
 # the action invocation. Instead we provide per-file exceptions after
 # this.
 #
 # On the bright side, this means we don't have to more broadly apply
 # the exceptions to an entire target.
 #
 # Looking for CUDA flags? We have a cu_library macro that we can edit
 # directly. Look in //tools/rules:cu.bzl for details. Editing the
 # macro over this has the following advantages:
 #  * making changes does not require discarding the Bazel analysis
 #    cache
 #  * it allows for selective overrides on individual targets since the
 #    macro-level opts will come earlier than target level overrides
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
 # The following warnings come from -Wall. We downgrade them from error
 # to warnings here.
 #
 # We intentionally use #pragma unroll, which is compiler specific.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=extra
 # The following warnings come from -Wextra. We downgrade them from error
 # to warnings here.
 #
 # unused-parameter-compare has a tremendous amount of violations in the
 # codebase. It will be a lot of work to fix them, just disable it for
 # now.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-parameter
 # missing-field-parameters has both a large number of violations in
 # the codebase, but it also is used pervasively in the Python C
 # API. There are a couple of catches though:
 # * we use multiple versions of the Python API and hence have
 #   potentially multiple different versions of each relevant
 #   struct. They may have different numbers of fields. It will be
 #   unwieldy to support multiple versions in the same source file.
 # * Python itself for many of these structs recommends only
 #   initializing a subset of the fields. We should respect the API
 #   usage conventions of our dependencies.
 #
 # Hence, we just disable this warning altogether. We may want to clean
 # up some of the clear-cut cases that could be risky, but we still
 # likely want to have this disabled for the most part.
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-missing-field-initializers
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-function
 build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-variable
 build --per_file_copt='//:aten/src/ATen/RegisterCompositeExplicitAutograd\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterCompositeImplicitAutograd\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterMkldnnCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterQuantizedCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterSparseCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterSparseCsrCPU\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorMeta\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterSparseMeta\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterQuantizedMeta\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:aten/src/ATen/RegisterZeroTensor\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:torch/csrc/lazy/generated/RegisterAutogradLazy\.cpp$'@-Wno-error=unused-function
 build --per_file_copt='//:torch/csrc/lazy/generated/RegisterLazy\.cpp$'@-Wno-error=unused-function
--- a/.bazelversion
+++ b/.bazelversion
@ -1 +1 @@
-6.5.0
+3.1.0
--- a/.bc-linter.yml
+++ b/.bc-linter.yml
@ -1,15 +0,0 @@
 version: 1
 paths:
 include:
  - "**/*.py"
 exclude:
  - ".*"
  - ".*/**"
  - "**/.*/**"
  - "**/.*"
  - "**/_*/**"
  - "**/_*.py"
  - "**/test/**"
  - "**/benchmarks/**"
  - "**/test_*.py"
  - "**/*_test.py"
--- a/.ci/aarch64_linux/README.md
+++ b/.ci/aarch64_linux/README.md
@ -1,19 +0,0 @@
 # Aarch64 (ARM/Graviton) Support Scripts
 Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels:
 * torch
 * torchvision
 * torchaudio
 * torchtext
 * torchdata
 ## Aarch64_ci_build.sh
 This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```.
 ### Usage
 ```DESIRED_PYTHON=<PythonVersion> aarch64_ci_build.sh```
 __NOTE:__ CI build is currently __EXPERMINTAL__
 ## Build_aarch64_wheel.py
 This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system.
 ### Usage
 ```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>```
--- a/.ci/aarch64_linux/aarch64_ci_build.sh
+++ b/.ci/aarch64_linux/aarch64_ci_build.sh
@ -1,53 +0,0 @@
 #!/bin/bash
 set -eux -o pipefail
 GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
 # Set CUDA architecture lists to match x86 build_cuda.sh
 if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
    export TORCH_CUDA_ARCH_LIST="8.0;9.0"
 elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
    export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
 elif [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then
    export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
 elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
    export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
 fi
 # Compress the fatbin with -compress-mode=size for CUDA 13
 if [[ "$DESIRED_CUDA" == *"13"* ]]; then
    export TORCH_NVCC_FLAGS="-compress-mode=size"
    # Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801
    export BUILD_BUNDLE_PTXAS=1
 fi
 SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
 source $SCRIPTPATH/aarch64_ci_setup.sh
 ###############################################################################
 # Run aarch64 builder python
 ###############################################################################
 cd /
 # adding safe directory for git as the permissions will be
 # on the mounted pytorch repo
 git config --global --add safe.directory /pytorch
 pip install -r /pytorch/requirements.txt
 pip install auditwheel==6.2.0 wheel
 if [ "$DESIRED_CUDA" = "cpu" ]; then
    echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
 else
    echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
    export USE_SYSTEM_NCCL=1
    # Check if we should use NVIDIA libs from PyPI (similar to x86 build_cuda.sh logic)
    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
        echo "Bundling CUDA libraries with wheel for aarch64."
    else
        echo "Using nvidia libs from pypi for aarch64."
        echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS"
        export USE_NVIDIA_PYPI_LIBS=1
    fi
    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
 fi
--- a/.ci/aarch64_linux/aarch64_ci_setup.sh
+++ b/.ci/aarch64_linux/aarch64_ci_setup.sh
@ -1,21 +0,0 @@
 #!/bin/bash
 set -eux -o pipefail
 # This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script
 # By creating symlinks from desired /opt/python to /usr/local/bin/
 NUMPY_VERSION=2.0.2
 if [[ "$DESIRED_PYTHON"  == "3.13" || "$DESIRED_PYTHON" == "3.13t" ]]; then
    NUMPY_VERSION=2.1.2
 fi
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 source $SCRIPTPATH/../manywheel/set_desired_python.sh
 pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2
 for tool in python python3 pip pip3 ninja scons patchelf; do
    ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;
 done
 python --version
--- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@ -1,333 +0,0 @@
 #!/usr/bin/env python3
 # encoding: UTF-8
 import os
 import shutil
 from subprocess import check_call, check_output
 def list_dir(path: str) -> list[str]:
    """'
    Helper for getting paths for Python
    """
    return check_output(["ls", "-1", path]).decode().split("\n")
 def replace_tag(filename) -> None:
    with open(filename) as f:
        lines = f.readlines()
    for i, line in enumerate(lines):
        if line.startswith("Tag:"):
            lines[i] = line.replace("-linux_", "-manylinux_2_28_")
            print(f"Updated tag from {line} to {lines[i]}")
            break
    with open(filename, "w") as f:
        f.writelines(lines)
 def patch_library_rpath(
    folder: str,
    lib_name: str,
    use_nvidia_pypi_libs: bool = False,
    desired_cuda: str = "",
 ) -> None:
    """Apply patchelf to set RPATH for a library in torch/lib"""
    lib_path = f"{folder}/tmp/torch/lib/{lib_name}"
    if use_nvidia_pypi_libs:
        # For PyPI NVIDIA libraries, construct CUDA RPATH
        cuda_rpaths = [
            "$ORIGIN/../../nvidia/cudnn/lib",
            "$ORIGIN/../../nvidia/nvshmem/lib",
            "$ORIGIN/../../nvidia/nccl/lib",
            "$ORIGIN/../../nvidia/cusparselt/lib",
        ]
        if "130" in desired_cuda:
            cuda_rpaths.append("$ORIGIN/../../nvidia/cu13/lib")
        else:
            cuda_rpaths.extend(
                [
                    "$ORIGIN/../../nvidia/cublas/lib",
                    "$ORIGIN/../../nvidia/cuda_cupti/lib",
                    "$ORIGIN/../../nvidia/cuda_nvrtc/lib",
                    "$ORIGIN/../../nvidia/cuda_runtime/lib",
                    "$ORIGIN/../../nvidia/cufft/lib",
                    "$ORIGIN/../../nvidia/curand/lib",
                    "$ORIGIN/../../nvidia/cusolver/lib",
                    "$ORIGIN/../../nvidia/cusparse/lib",
                    "$ORIGIN/../../nvidia/nvtx/lib",
                    "$ORIGIN/../../nvidia/cufile/lib",
                ]
            )
        # Add $ORIGIN for local torch libs
        rpath = ":".join(cuda_rpaths) + ":$ORIGIN"
    else:
        # For bundled libraries, just use $ORIGIN
        rpath = "$ORIGIN"
    if os.path.exists(lib_path):
        os.system(
            f"cd {folder}/tmp/torch/lib/; "
            f"patchelf --set-rpath '{rpath}' --force-rpath {lib_name}"
        )
 def copy_and_patch_library(
    src_path: str,
    folder: str,
    use_nvidia_pypi_libs: bool = False,
    desired_cuda: str = "",
 ) -> None:
    """Copy a library to torch/lib and patch its RPATH"""
    if os.path.exists(src_path):
        lib_name = os.path.basename(src_path)
        shutil.copy2(src_path, f"{folder}/tmp/torch/lib/{lib_name}")
        patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
 def package_cuda_wheel(wheel_path, desired_cuda) -> None:
    """
    Package the cuda wheel libraries
    """
    folder = os.path.dirname(wheel_path)
    os.mkdir(f"{folder}/tmp")
    os.system(f"unzip {wheel_path} -d {folder}/tmp")
    # Delete original wheel since it will be repackaged
    os.system(f"rm {wheel_path}")
    # Check if we should use PyPI NVIDIA libraries or bundle system libraries
    use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
    if use_nvidia_pypi_libs:
        print("Using nvidia libs from pypi - skipping CUDA library bundling")
        # For PyPI approach, we don't bundle CUDA libraries - they come from PyPI packages
        # We only need to bundle non-NVIDIA libraries
        minimal_libs_to_copy = [
            "/lib64/libgomp.so.1",
            "/usr/lib64/libgfortran.so.5",
            "/acl/build/libarm_compute.so",
            "/acl/build/libarm_compute_graph.so",
            "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
            "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
            "/usr/local/lib/libnvpl_lapack_core.so.0",
            "/usr/local/lib/libnvpl_blas_core.so.0",
        ]
        # Copy minimal libraries to unzipped_folder/torch/lib
        for lib_path in minimal_libs_to_copy:
            copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
        # Patch torch libraries used for searching libraries
        torch_libs_to_patch = [
            "libtorch.so",
            "libtorch_cpu.so",
            "libtorch_cuda.so",
            "libtorch_cuda_linalg.so",
            "libtorch_global_deps.so",
            "libtorch_python.so",
            "libtorch_nvshmem.so",
            "libc10.so",
            "libc10_cuda.so",
            "libcaffe2_nvrtc.so",
            "libshm.so",
        ]
        for lib_name in torch_libs_to_patch:
            patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
    else:
        print("Bundling CUDA libraries with wheel")
        # Original logic for bundling system CUDA libraries
        # Common libraries for all CUDA versions
        common_libs = [
            # Non-NVIDIA system libraries
            "/lib64/libgomp.so.1",
            "/usr/lib64/libgfortran.so.5",
            "/acl/build/libarm_compute.so",
            "/acl/build/libarm_compute_graph.so",
            # Common CUDA libraries (same for all versions)
            "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
            "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
            "/usr/local/lib/libnvpl_lapack_core.so.0",
            "/usr/local/lib/libnvpl_blas_core.so.0",
            "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
            "/usr/local/cuda/lib64/libcudnn.so.9",
            "/usr/local/cuda/lib64/libcusparseLt.so.0",
            "/usr/local/cuda/lib64/libcurand.so.10",
            "/usr/local/cuda/lib64/libnccl.so.2",
            "/usr/local/cuda/lib64/libnvshmem_host.so.3",
            "/usr/local/cuda/lib64/libcudnn_adv.so.9",
            "/usr/local/cuda/lib64/libcudnn_cnn.so.9",
            "/usr/local/cuda/lib64/libcudnn_graph.so.9",
            "/usr/local/cuda/lib64/libcudnn_ops.so.9",
            "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
            "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
            "/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
            "/usr/local/cuda/lib64/libcufile.so.0",
            "/usr/local/cuda/lib64/libcufile_rdma.so.1",
            "/usr/local/cuda/lib64/libcusparse.so.12",
        ]
        # CUDA version-specific libraries
        if "13" in desired_cuda:
            minor_version = desired_cuda[-1]
            version_specific_libs = [
                "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13",
                "/usr/local/cuda/lib64/libcublas.so.13",
                "/usr/local/cuda/lib64/libcublasLt.so.13",
                "/usr/local/cuda/lib64/libcudart.so.13",
                "/usr/local/cuda/lib64/libcufft.so.12",
                "/usr/local/cuda/lib64/libcusolver.so.12",
                "/usr/local/cuda/lib64/libnvJitLink.so.13",
                "/usr/local/cuda/lib64/libnvrtc.so.13",
                f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}",
            ]
        elif "12" in desired_cuda:
            # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9")
            minor_version = desired_cuda[-1]
            version_specific_libs = [
                "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
                "/usr/local/cuda/lib64/libcublas.so.12",
                "/usr/local/cuda/lib64/libcublasLt.so.12",
                "/usr/local/cuda/lib64/libcudart.so.12",
                "/usr/local/cuda/lib64/libcufft.so.11",
                "/usr/local/cuda/lib64/libcusolver.so.11",
                "/usr/local/cuda/lib64/libnvJitLink.so.12",
                "/usr/local/cuda/lib64/libnvrtc.so.12",
                f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}",
            ]
        else:
            raise ValueError(f"Unsupported CUDA version: {desired_cuda}.")
        # Combine all libraries
        libs_to_copy = common_libs + version_specific_libs
        # Copy libraries to unzipped_folder/torch/lib
        for lib_path in libs_to_copy:
            copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
    # Make sure the wheel is tagged with manylinux_2_28
    for f in os.scandir(f"{folder}/tmp/"):
        if f.is_dir() and f.name.endswith(".dist-info"):
            replace_tag(f"{f.path}/WHEEL")
            break
    os.system(f"wheel pack {folder}/tmp/ -d {folder}")
    os.system(f"rm -rf {folder}/tmp/")
 def complete_wheel(folder: str) -> str:
    """
    Complete wheel build and put in artifact location
    """
    wheel_name = list_dir(f"/{folder}/dist")[0]
    # Please note for cuda we don't run auditwheel since we use custom script to package
    # the cuda dependencies to the wheel file using update_wheel() method.
    # However we need to make sure filename reflects the correct Manylinux platform.
    if "pytorch" in folder and not enable_cuda:
        print("Repairing Wheel with AuditWheel")
        check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
        repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0]
        print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist")
        os.rename(
            f"/{folder}/wheelhouse/{repaired_wheel_name}",
            f"/{folder}/dist/{repaired_wheel_name}",
        )
    else:
        repaired_wheel_name = list_dir(f"/{folder}/dist")[0]
    print(f"Copying {repaired_wheel_name} to artifacts")
    shutil.copy2(
        f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}"
    )
    return repaired_wheel_name
 def parse_arguments():
    """
    Parse inline arguments
    """
    from argparse import ArgumentParser
    parser = ArgumentParser("AARCH64 wheels python CD")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--build-only", action="store_true")
    parser.add_argument("--test-only", type=str)
    parser.add_argument("--enable-mkldnn", action="store_true")
    parser.add_argument("--enable-cuda", action="store_true")
    return parser.parse_args()
 if __name__ == "__main__":
    """
    Entry Point
    """
    args = parse_arguments()
    enable_mkldnn = args.enable_mkldnn
    enable_cuda = args.enable_cuda
    branch = check_output(
        ["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch"
    ).decode()
    print("Building PyTorch wheel")
    build_vars = ""
    # MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
    if enable_cuda:
        build_vars += "MAX_JOBS=5 "
        # Handle PyPI NVIDIA libraries vs bundled libraries
        use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
        if use_nvidia_pypi_libs:
            print("Configuring build for PyPI NVIDIA libraries")
            # Configure for dynamic linking (matching x86 logic)
            build_vars += "ATEN_STATIC_CUDA=0 USE_CUDA_STATIC_LINK=0 USE_CUPTI_SO=1 "
        else:
            print("Configuring build for bundled NVIDIA libraries")
            # Keep existing static linking approach - already configured above
    override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
    desired_cuda = os.getenv("DESIRED_CUDA")
    if override_package_version is not None:
        version = override_package_version
        build_vars += (
            f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
        )
    elif branch in ["nightly", "main"]:
        build_date = (
            check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
            .decode()
            .replace("-", "")
        )
        version = (
            check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
        )
        if enable_cuda:
            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 "
        else:
            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
    elif branch.startswith(("v1.", "v2.")):
        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
    if enable_mkldnn:
        print("build pytorch with mkldnn+acl backend")
        build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
        build_vars += "ACL_ROOT_DIR=/acl "
        if enable_cuda:
            build_vars += "BLAS=NVPL "
        else:
            build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS "
    else:
        print("build pytorch without mkldnn backend")
    os.system(f"cd /pytorch; {build_vars} python3 -m build --wheel --no-isolation")
    if enable_cuda:
        print("Updating Cuda Dependency")
        filename = os.listdir("/pytorch/dist/")
        wheel_path = f"/pytorch/dist/{filename[0]}"
        package_cuda_wheel(wheel_path, desired_cuda)
    pytorch_wheel_name = complete_wheel("/pytorch/")
    print(f"Build Complete. Created {pytorch_wheel_name}..")
--- a/.ci/aarch64_linux/build_aarch64_wheel.py
+++ b/.ci/aarch64_linux/build_aarch64_wheel.py
@ -1,999 +0,0 @@
 #!/usr/bin/env python3
 # This script is for building  AARCH64 wheels using AWS EC2 instances.
 # To generate binaries for the release follow these steps:
 # 1. Update mappings for each of the Domain Libraries by adding new row to a table like this:
 #         "v1.11.0": ("0.11.0", "rc1"),
 # 2. Run script with following arguments for each of the supported python versions and required tag, for example:
 # build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch v1.11.0-rc3
 import os
 import subprocess
 import sys
 import time
 from typing import Optional, Union
 import boto3
 # AMI images for us-east-1, change the following based on your ~/.aws/config
 os_amis = {
    "ubuntu20_04": "ami-052eac90edaa9d08f",  # login_name: ubuntu
    "ubuntu22_04": "ami-0c6c29c5125214c77",  # login_name: ubuntu
    "redhat8": "ami-0698b90665a2ddcf1",  # login_name: ec2-user
 }
 ubuntu20_04_ami = os_amis["ubuntu20_04"]
 def compute_keyfile_path(key_name: Optional[str] = None) -> tuple[str, str]:
    if key_name is None:
        key_name = os.getenv("AWS_KEY_NAME")
        if key_name is None:
            return os.getenv("SSH_KEY_PATH", ""), ""
    homedir_path = os.path.expanduser("~")
    default_path = os.path.join(homedir_path, ".ssh", f"{key_name}.pem")
    return os.getenv("SSH_KEY_PATH", default_path), key_name
 ec2 = boto3.resource("ec2")
 def ec2_get_instances(filter_name, filter_value):
    return ec2.instances.filter(
        Filters=[{"Name": filter_name, "Values": [filter_value]}]
    )
 def ec2_instances_of_type(instance_type="t4g.2xlarge"):
    return ec2_get_instances("instance-type", instance_type)
 def ec2_instances_by_id(instance_id):
    rc = list(ec2_get_instances("instance-id", instance_id))
    return rc[0] if len(rc) > 0 else None
 def start_instance(
    key_name, ami=ubuntu20_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50
 ):
    inst = ec2.create_instances(
        ImageId=ami,
        InstanceType=instance_type,
        SecurityGroups=["ssh-allworld"],
        KeyName=key_name,
        MinCount=1,
        MaxCount=1,
        BlockDeviceMappings=[
            {
                "DeviceName": "/dev/sda1",
                "Ebs": {
                    "DeleteOnTermination": True,
                    "VolumeSize": ebs_size,
                    "VolumeType": "standard",
                },
            }
        ],
    )[0]
    print(f"Create instance {inst.id}")
    inst.wait_until_running()
    running_inst = ec2_instances_by_id(inst.id)
    print(f"Instance started at {running_inst.public_dns_name}")
    return running_inst
 class RemoteHost:
    addr: str
    keyfile_path: str
    login_name: str
    container_id: Optional[str] = None
    ami: Optional[str] = None
    def __init__(self, addr: str, keyfile_path: str, login_name: str = "ubuntu"):
        self.addr = addr
        self.keyfile_path = keyfile_path
        self.login_name = login_name
    def _gen_ssh_prefix(self) -> list[str]:
        return [
            "ssh",
            "-o",
            "StrictHostKeyChecking=no",
            "-i",
            self.keyfile_path,
            f"{self.login_name}@{self.addr}",
            "--",
        ]
    @staticmethod
    def _split_cmd(args: Union[str, list[str]]) -> list[str]:
        return args.split() if isinstance(args, str) else args
    def run_ssh_cmd(self, args: Union[str, list[str]]) -> None:
        subprocess.check_call(self._gen_ssh_prefix() + self._split_cmd(args))
    def check_ssh_output(self, args: Union[str, list[str]]) -> str:
        return subprocess.check_output(
            self._gen_ssh_prefix() + self._split_cmd(args)
        ).decode("utf-8")
    def scp_upload_file(self, local_file: str, remote_file: str) -> None:
        subprocess.check_call(
            [
                "scp",
                "-i",
                self.keyfile_path,
                local_file,
                f"{self.login_name}@{self.addr}:{remote_file}",
            ]
        )
    def scp_download_file(
        self, remote_file: str, local_file: Optional[str] = None
    ) -> None:
        if local_file is None:
            local_file = "."
        subprocess.check_call(
            [
                "scp",
                "-i",
                self.keyfile_path,
                f"{self.login_name}@{self.addr}:{remote_file}",
                local_file,
            ]
        )
    def start_docker(self, image="quay.io/pypa/manylinux2014_aarch64:latest") -> None:
        self.run_ssh_cmd("sudo apt-get install -y docker.io")
        self.run_ssh_cmd(f"sudo usermod -a -G docker {self.login_name}")
        self.run_ssh_cmd("sudo service docker start")
        self.run_ssh_cmd(f"docker pull {image}")
        self.container_id = self.check_ssh_output(
            f"docker run -t -d -w /root {image}"
        ).strip()
    def using_docker(self) -> bool:
        return self.container_id is not None
    def run_cmd(self, args: Union[str, list[str]]) -> None:
        if not self.using_docker():
            return self.run_ssh_cmd(args)
        assert self.container_id is not None
        docker_cmd = self._gen_ssh_prefix() + [
            "docker",
            "exec",
            "-i",
            self.container_id,
            "bash",
        ]
        p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE)
        p.communicate(
            input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode(
                "utf-8"
            )
        )
        rc = p.wait()
        if rc != 0:
            raise subprocess.CalledProcessError(rc, docker_cmd)
    def check_output(self, args: Union[str, list[str]]) -> str:
        if not self.using_docker():
            return self.check_ssh_output(args)
        assert self.container_id is not None
        docker_cmd = self._gen_ssh_prefix() + [
            "docker",
            "exec",
            "-i",
            self.container_id,
            "bash",
        ]
        p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
        (out, err) = p.communicate(
            input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode(
                "utf-8"
            )
        )
        rc = p.wait()
        if rc != 0:
            raise subprocess.CalledProcessError(rc, docker_cmd, output=out, stderr=err)
        return out.decode("utf-8")
    def upload_file(self, local_file: str, remote_file: str) -> None:
        if not self.using_docker():
            return self.scp_upload_file(local_file, remote_file)
        tmp_file = os.path.join("/tmp", os.path.basename(local_file))
        self.scp_upload_file(local_file, tmp_file)
        self.run_ssh_cmd(
            ["docker", "cp", tmp_file, f"{self.container_id}:/root/{remote_file}"]
        )
        self.run_ssh_cmd(["rm", tmp_file])
    def download_file(self, remote_file: str, local_file: Optional[str] = None) -> None:
        if not self.using_docker():
            return self.scp_download_file(remote_file, local_file)
        tmp_file = os.path.join("/tmp", os.path.basename(remote_file))
        self.run_ssh_cmd(
            ["docker", "cp", f"{self.container_id}:/root/{remote_file}", tmp_file]
        )
        self.scp_download_file(tmp_file, local_file)
        self.run_ssh_cmd(["rm", tmp_file])
    def download_wheel(
        self, remote_file: str, local_file: Optional[str] = None
    ) -> None:
        if self.using_docker() and local_file is None:
            basename = os.path.basename(remote_file)
            local_file = basename.replace(
                "-linux_aarch64.whl", "-manylinux2014_aarch64.whl"
            )
        self.download_file(remote_file, local_file)
    def list_dir(self, path: str) -> list[str]:
        return self.check_output(["ls", "-1", path]).split("\n")
 def wait_for_connection(addr, port, timeout=15, attempt_cnt=5):
    import socket
    for i in range(attempt_cnt):
        try:
            with socket.create_connection((addr, port), timeout=timeout):
                return
        except (ConnectionRefusedError, TimeoutError):  # noqa: PERF203
            if i == attempt_cnt - 1:
                raise
            time.sleep(timeout)
 def update_apt_repo(host: RemoteHost) -> None:
    time.sleep(5)
    host.run_cmd("sudo systemctl stop apt-daily.service || true")
    host.run_cmd("sudo systemctl stop unattended-upgrades.service || true")
    host.run_cmd(
        "while systemctl is-active --quiet apt-daily.service; do sleep 1; done"
    )
    host.run_cmd(
        "while systemctl is-active --quiet unattended-upgrades.service; do sleep 1; done"
    )
    host.run_cmd("sudo apt-get update")
    time.sleep(3)
    host.run_cmd("sudo apt-get update")
 def install_condaforge(
    host: RemoteHost, suffix: str = "latest/download/Miniforge3-Linux-aarch64.sh"
 ) -> None:
    print("Install conda-forge")
    host.run_cmd(f"curl -OL https://github.com/conda-forge/miniforge/releases/{suffix}")
    host.run_cmd(f"sh -f {os.path.basename(suffix)} -b")
    host.run_cmd(f"rm -f {os.path.basename(suffix)}")
    if host.using_docker():
        host.run_cmd("echo 'PATH=$HOME/miniforge3/bin:$PATH'>>.bashrc")
    else:
        host.run_cmd(
            [
                "sed",
                "-i",
                "'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH'",
                ".bashrc",
            ]
        )
 def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
    if python_version == "3.6":
        # Python-3.6 EOLed and not compatible with conda-4.11
        install_condaforge(
            host, suffix="download/4.10.3-10/Miniforge3-4.10.3-10-Linux-aarch64.sh"
        )
        host.run_cmd(f"conda install -y python={python_version} numpy pyyaml")
    else:
        install_condaforge(
            host, suffix="download/4.11.0-4/Miniforge3-4.11.0-4-Linux-aarch64.sh"
        )
        # Pytorch-1.10 or older are not compatible with setuptools=59.6 or newer
        host.run_cmd(
            f"conda install -y python={python_version} numpy pyyaml setuptools>=59.5.0"
        )
 def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None:
    host.run_cmd("pip3 install auditwheel")
    host.run_cmd(
        "conda install -y patchelf" if use_conda else "sudo apt-get install -y patchelf"
    )
    from tempfile import NamedTemporaryFile
    with NamedTemporaryFile() as tmp:
        tmp.write(embed_library_script.encode("utf-8"))
        tmp.flush()
        host.upload_file(tmp.name, "embed_library.py")
    print("Embedding libgomp into wheel")
    if host.using_docker():
        host.run_cmd(f"python3 embed_library.py {wheel_name} --update-tag")
    else:
        host.run_cmd(f"python3 embed_library.py {wheel_name}")
 def checkout_repo(
    host: RemoteHost,
    *,
    branch: str = "main",
    url: str,
    git_clone_flags: str,
    mapping: dict[str, tuple[str, str]],
 ) -> Optional[str]:
    for prefix in mapping:
        if not branch.startswith(prefix):
            continue
        tag = f"v{mapping[prefix][0]}-{mapping[prefix][1]}"
        host.run_cmd(f"git clone {url} -b {tag} {git_clone_flags}")
        return mapping[prefix][0]
    host.run_cmd(f"git clone {url} -b {branch} {git_clone_flags}")
    return None
 def build_torchvision(
    host: RemoteHost,
    *,
    branch: str = "main",
    use_conda: bool = True,
    git_clone_flags: str,
    run_smoke_tests: bool = True,
 ) -> str:
    print("Checking out TorchVision repo")
    build_version = checkout_repo(
        host,
        branch=branch,
        url="https://github.com/pytorch/vision",
        git_clone_flags=git_clone_flags,
        mapping={
            "v1.7.1": ("0.8.2", "rc2"),
            "v1.8.0": ("0.9.0", "rc3"),
            "v1.8.1": ("0.9.1", "rc1"),
            "v1.9.0": ("0.10.0", "rc1"),
            "v1.10.0": ("0.11.1", "rc1"),
            "v1.10.1": ("0.11.2", "rc1"),
            "v1.10.2": ("0.11.3", "rc1"),
            "v1.11.0": ("0.12.0", "rc1"),
            "v1.12.0": ("0.13.0", "rc4"),
            "v1.12.1": ("0.13.1", "rc6"),
            "v1.13.0": ("0.14.0", "rc4"),
            "v1.13.1": ("0.14.1", "rc2"),
            "v2.0.0": ("0.15.1", "rc2"),
            "v2.0.1": ("0.15.2", "rc2"),
        },
    )
    print("Building TorchVision wheel")
    # Please note libnpg and jpeg are required to build image.so extension
    if use_conda:
        host.run_cmd("conda install -y libpng jpeg")
        # Remove .so files to force static linking
        host.run_cmd(
            "rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so"
        )
        # And patch setup.py to include libz dependency for libpng
        host.run_cmd(
            [
                'sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'
            ]
        )
    build_vars = ""
    if branch == "nightly":
        version = host.check_output(
            ["if [ -f vision/version.txt ]; then cat vision/version.txt; fi"]
        ).strip()
        if len(version) == 0:
            # In older revisions, version was embedded in setup.py
            version = (
                host.check_output(["grep", '"version = \'"', "vision/setup.py"])
                .strip()
                .split("'")[1][:-2]
            )
        build_date = (
            host.check_output("cd vision && git log --pretty=format:%s -1")
            .strip()
            .split()[0]
            .replace("-", "")
        )
        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
    elif build_version is not None:
        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
    if host.using_docker():
        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
    host.run_cmd(f"cd vision && {build_vars} python3 -m build --wheel --no-isolation")
    vision_wheel_name = host.list_dir("vision/dist")[0]
    embed_libgomp(host, use_conda, os.path.join("vision", "dist", vision_wheel_name))
    print("Copying TorchVision wheel")
    host.download_wheel(os.path.join("vision", "dist", vision_wheel_name))
    if run_smoke_tests:
        host.run_cmd(
            f"pip3 install {os.path.join('vision', 'dist', vision_wheel_name)}"
        )
        host.run_cmd("python3 vision/test/smoke_test.py")
    print("Delete vision checkout")
    host.run_cmd("rm -rf vision")
    return vision_wheel_name
 def build_torchdata(
    host: RemoteHost,
    *,
    branch: str = "main",
    use_conda: bool = True,
    git_clone_flags: str = "",
 ) -> str:
    print("Checking out TorchData repo")
    git_clone_flags += " --recurse-submodules"
    build_version = checkout_repo(
        host,
        branch=branch,
        url="https://github.com/pytorch/data",
        git_clone_flags=git_clone_flags,
        mapping={
            "v1.13.1": ("0.5.1", ""),
            "v2.0.0": ("0.6.0", "rc5"),
            "v2.0.1": ("0.6.1", "rc1"),
        },
    )
    print("Building TorchData wheel")
    build_vars = ""
    if branch == "nightly":
        version = host.check_output(
            ["if [ -f data/version.txt ]; then cat data/version.txt; fi"]
        ).strip()
        build_date = (
            host.check_output("cd data && git log --pretty=format:%s -1")
            .strip()
            .split()[0]
            .replace("-", "")
        )
        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
    elif build_version is not None:
        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
    if host.using_docker():
        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
    host.run_cmd(f"cd data && {build_vars} python3 -m build --wheel --no-isolation")
    wheel_name = host.list_dir("data/dist")[0]
    embed_libgomp(host, use_conda, os.path.join("data", "dist", wheel_name))
    print("Copying TorchData wheel")
    host.download_wheel(os.path.join("data", "dist", wheel_name))
    return wheel_name
 def build_torchtext(
    host: RemoteHost,
    *,
    branch: str = "main",
    use_conda: bool = True,
    git_clone_flags: str = "",
 ) -> str:
    print("Checking out TorchText repo")
    git_clone_flags += " --recurse-submodules"
    build_version = checkout_repo(
        host,
        branch=branch,
        url="https://github.com/pytorch/text",
        git_clone_flags=git_clone_flags,
        mapping={
            "v1.9.0": ("0.10.0", "rc1"),
            "v1.10.0": ("0.11.0", "rc2"),
            "v1.10.1": ("0.11.1", "rc1"),
            "v1.10.2": ("0.11.2", "rc1"),
            "v1.11.0": ("0.12.0", "rc1"),
            "v1.12.0": ("0.13.0", "rc2"),
            "v1.12.1": ("0.13.1", "rc5"),
            "v1.13.0": ("0.14.0", "rc3"),
            "v1.13.1": ("0.14.1", "rc1"),
            "v2.0.0": ("0.15.1", "rc2"),
            "v2.0.1": ("0.15.2", "rc2"),
        },
    )
    print("Building TorchText wheel")
    build_vars = ""
    if branch == "nightly":
        version = host.check_output(
            ["if [ -f text/version.txt ]; then cat text/version.txt; fi"]
        ).strip()
        build_date = (
            host.check_output("cd text && git log --pretty=format:%s -1")
            .strip()
            .split()[0]
            .replace("-", "")
        )
        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
    elif build_version is not None:
        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
    if host.using_docker():
        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
    host.run_cmd(f"cd text && {build_vars} python3 -m build --wheel --no-isolation")
    wheel_name = host.list_dir("text/dist")[0]
    embed_libgomp(host, use_conda, os.path.join("text", "dist", wheel_name))
    print("Copying TorchText wheel")
    host.download_wheel(os.path.join("text", "dist", wheel_name))
    return wheel_name
 def build_torchaudio(
    host: RemoteHost,
    *,
    branch: str = "main",
    use_conda: bool = True,
    git_clone_flags: str = "",
 ) -> str:
    print("Checking out TorchAudio repo")
    git_clone_flags += " --recurse-submodules"
    build_version = checkout_repo(
        host,
        branch=branch,
        url="https://github.com/pytorch/audio",
        git_clone_flags=git_clone_flags,
        mapping={
            "v1.9.0": ("0.9.0", "rc2"),
            "v1.10.0": ("0.10.0", "rc5"),
            "v1.10.1": ("0.10.1", "rc1"),
            "v1.10.2": ("0.10.2", "rc1"),
            "v1.11.0": ("0.11.0", "rc1"),
            "v1.12.0": ("0.12.0", "rc3"),
            "v1.12.1": ("0.12.1", "rc5"),
            "v1.13.0": ("0.13.0", "rc4"),
            "v1.13.1": ("0.13.1", "rc2"),
            "v2.0.0": ("2.0.1", "rc3"),
            "v2.0.1": ("2.0.2", "rc2"),
        },
    )
    print("Building TorchAudio wheel")
    build_vars = ""
    if branch == "nightly":
        version = (
            host.check_output(["grep", '"version = \'"', "audio/setup.py"])
            .strip()
            .split("'")[1][:-2]
        )
        build_date = (
            host.check_output("cd audio && git log --pretty=format:%s -1")
            .strip()
            .split()[0]
            .replace("-", "")
        )
        build_vars += f"BUILD_VERSION={version}.dev{build_date}"
    elif build_version is not None:
        build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
    if host.using_docker():
        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
    host.run_cmd(
        f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
        && ./packaging/ffmpeg/build.sh \
        && {build_vars} python3 -m build --wheel --no-isolation"
    )
    wheel_name = host.list_dir("audio/dist")[0]
    embed_libgomp(host, use_conda, os.path.join("audio", "dist", wheel_name))
    print("Copying TorchAudio wheel")
    host.download_wheel(os.path.join("audio", "dist", wheel_name))
    return wheel_name
 def configure_system(
    host: RemoteHost,
    *,
    compiler: str = "gcc-8",
    use_conda: bool = True,
    python_version: str = "3.8",
 ) -> None:
    if use_conda:
        install_condaforge_python(host, python_version)
    print("Configuring the system")
    if not host.using_docker():
        update_apt_repo(host)
        host.run_cmd("sudo apt-get install -y ninja-build g++ git cmake gfortran unzip")
    else:
        host.run_cmd("yum install -y sudo")
        host.run_cmd("conda install -y ninja scons")
    if not use_conda:
        host.run_cmd(
            "sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip"
        )
    host.run_cmd("pip3 install dataclasses typing-extensions")
    if not use_conda:
        print("Installing Cython + numpy from PyPy")
        host.run_cmd("sudo pip3 install Cython")
        host.run_cmd("sudo pip3 install numpy")
 def build_domains(
    host: RemoteHost,
    *,
    branch: str = "main",
    use_conda: bool = True,
    git_clone_flags: str = "",
 ) -> tuple[str, str, str, str]:
    vision_wheel_name = build_torchvision(
        host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
    )
    audio_wheel_name = build_torchaudio(
        host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
    )
    data_wheel_name = build_torchdata(
        host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
    )
    text_wheel_name = build_torchtext(
        host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
    )
    return (vision_wheel_name, audio_wheel_name, data_wheel_name, text_wheel_name)
 def start_build(
    host: RemoteHost,
    *,
    branch: str = "main",
    compiler: str = "gcc-8",
    use_conda: bool = True,
    python_version: str = "3.8",
    pytorch_only: bool = False,
    pytorch_build_number: Optional[str] = None,
    shallow_clone: bool = True,
    enable_mkldnn: bool = False,
 ) -> tuple[str, str, str, str, str]:
    git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else ""
    if host.using_docker() and not use_conda:
        print("Auto-selecting conda option for docker images")
        use_conda = True
    if not host.using_docker():
        print("Disable mkldnn for host builds")
        enable_mkldnn = False
    configure_system(
        host, compiler=compiler, use_conda=use_conda, python_version=python_version
    )
    if host.using_docker():
        print("Move libgfortant.a into a standard location")
        # HACK: pypa gforntran.a is compiled without PIC, which leads to the following error
        # libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17'  # noqa: E501, B950
        # Workaround by copying gfortran library from the host
        host.run_ssh_cmd("sudo apt-get install -y gfortran-8")
        host.run_cmd("mkdir -p /usr/lib/gcc/aarch64-linux-gnu/8")
        host.run_ssh_cmd(
            [
                "docker",
                "cp",
                "/usr/lib/gcc/aarch64-linux-gnu/8/libgfortran.a",
                f"{host.container_id}:/opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/",
            ]
        )
    print("Checking out PyTorch repo")
    host.run_cmd(
        f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}"
    )
    host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh")
    print("Building PyTorch wheel")
    build_opts = ""
    if pytorch_build_number is not None:
        build_opts += f" -C--build-option=--build-number={pytorch_build_number}"
    # Breakpad build fails on aarch64
    build_vars = "USE_BREAKPAD=0 "
    if branch == "nightly":
        build_date = (
            host.check_output("cd pytorch && git log --pretty=format:%s -1")
            .strip()
            .split()[0]
            .replace("-", "")
        )
        version = host.check_output("cat pytorch/version.txt").strip()[:-2]
        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
    if branch.startswith(("v1.", "v2.")):
        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
    if host.using_docker():
        build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
    if enable_mkldnn:
        host.run_cmd("pytorch/.ci/docker/common/install_acl.sh")
        print("build pytorch with mkldnn+acl backend")
        build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
        build_vars += " BLAS=OpenBLAS"
        build_vars += " OpenBLAS_HOME=/opt/OpenBLAS"
        build_vars += " ACL_ROOT_DIR=/acl"
        host.run_cmd(
            f"cd $HOME/pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}"
        )
        print("Repair the wheel")
        pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
        ld_library_path = "/acl/build:$HOME/pytorch/build/lib"
        host.run_cmd(
            f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}"
        )
        print("replace the original wheel with the repaired one")
        pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0]
        host.run_cmd(
            f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}"
        )
    else:
        print("build pytorch without mkldnn backend")
        host.run_cmd(
            f"cd pytorch && {build_vars} python3 -m build --wheel --no-isolation{build_opts}"
        )
    print("Deleting build folder")
    host.run_cmd("cd pytorch && rm -rf build")
    pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
    embed_libgomp(host, use_conda, os.path.join("pytorch", "dist", pytorch_wheel_name))
    print("Copying the wheel")
    host.download_wheel(os.path.join("pytorch", "dist", pytorch_wheel_name))
    print("Installing PyTorch wheel")
    host.run_cmd(f"pip3 install pytorch/dist/{pytorch_wheel_name}")
    if pytorch_only:
        return (pytorch_wheel_name, None, None, None, None)
    domain_wheels = build_domains(
        host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
    )
    return (pytorch_wheel_name, *domain_wheels)
 embed_library_script = """
 #!/usr/bin/env python3
 from auditwheel.patcher import Patchelf
 from auditwheel.wheeltools import InWheelCtx
 from auditwheel.elfutils import elf_file_filter
 from auditwheel.repair import copylib
 from auditwheel.lddtree import lddtree
 from subprocess import check_call
 import os
 import shutil
 import sys
 from tempfile import TemporaryDirectory
 def replace_tag(filename):
   with open(filename, 'r') as f:
     lines = f.read().split("\\n")
   for i,line in enumerate(lines):
       if not line.startswith("Tag: "):
           continue
       lines[i] = line.replace("-linux_", "-manylinux2014_")
       print(f'Updated tag from {line} to {lines[i]}')
   with open(filename, 'w') as f:
       f.write("\\n".join(lines))
 class AlignedPatchelf(Patchelf):
    def set_soname(self, file_name: str, new_soname: str) -> None:
        check_call(['patchelf', '--page-size', '65536', '--set-soname', new_soname, file_name])
    def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None:
        check_call(['patchelf', '--page-size', '65536', '--replace-needed', soname, new_soname, file_name])
 def embed_library(whl_path, lib_soname, update_tag=False):
    patcher = AlignedPatchelf()
    out_dir = TemporaryDirectory()
    whl_name = os.path.basename(whl_path)
    tmp_whl_name = os.path.join(out_dir.name, whl_name)
    with InWheelCtx(whl_path) as ctx:
        torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib')
        ctx.out_wheel=tmp_whl_name
        new_lib_path, new_lib_soname = None, None
        for filename, elf in elf_file_filter(ctx.iter_files()):
            if not filename.startswith('torch/lib'):
                continue
            libtree = lddtree(filename)
            if lib_soname not in libtree['needed']:
                continue
            lib_path = libtree['libs'][lib_soname]['path']
            if lib_path is None:
                print(f"Can't embed {lib_soname} as it could not be found")
                break
            if lib_path.startswith(torchlib_path):
                continue
            if new_lib_path is None:
                new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher)
            patcher.replace_needed(filename, lib_soname, new_lib_soname)
            print(f'Replacing {lib_soname} with {new_lib_soname} for {filename}')
        if update_tag:
            # Add manylinux2014 tag
            for filename in ctx.iter_files():
                if os.path.basename(filename) != 'WHEEL':
                    continue
                replace_tag(filename)
    shutil.move(tmp_whl_name, whl_path)
 if __name__ == '__main__':
    embed_library(sys.argv[1], 'libgomp.so.1', len(sys.argv) > 2 and sys.argv[2] == '--update-tag')
 """
 def run_tests(host: RemoteHost, whl: str, branch="main") -> None:
    print("Configuring the system")
    update_apt_repo(host)
    host.run_cmd("sudo apt-get install -y python3-pip git")
    host.run_cmd("sudo pip3 install Cython")
    host.run_cmd("sudo pip3 install numpy")
    host.upload_file(whl, ".")
    host.run_cmd(f"sudo pip3 install {whl}")
    host.run_cmd("python3 -c 'import torch;print(torch.rand((3,3))'")
    host.run_cmd(f"git clone -b {branch} https://github.com/pytorch/pytorch")
    host.run_cmd("cd pytorch/test; python3 test_torch.py -v")
 def get_instance_name(instance) -> Optional[str]:
    if instance.tags is None:
        return None
    for tag in instance.tags:
        if tag["Key"] == "Name":
            return tag["Value"]
    return None
 def list_instances(instance_type: str) -> None:
    print(f"All instances of type {instance_type}")
    for instance in ec2_instances_of_type(instance_type):
        ifaces = instance.network_interfaces
        az = ifaces[0].subnet.availability_zone if len(ifaces) > 0 else None
        print(
            f"{instance.id} {get_instance_name(instance)} {instance.public_dns_name} {instance.state['Name']} {az}"
        )
 def terminate_instances(instance_type: str) -> None:
    print(f"Terminating all instances of type {instance_type}")
    instances = list(ec2_instances_of_type(instance_type))
    for instance in instances:
        print(f"Terminating {instance.id}")
        instance.terminate()
    print("Waiting for termination to complete")
    for instance in instances:
        instance.wait_until_terminated()
 def parse_arguments():
    from argparse import ArgumentParser
    parser = ArgumentParser("Build and test AARCH64 wheels using EC2")
    parser.add_argument("--key-name", type=str)
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--build-only", action="store_true")
    parser.add_argument("--test-only", type=str)
    group = parser.add_mutually_exclusive_group()
    group.add_argument("--os", type=str, choices=list(os_amis.keys()))
    group.add_argument("--ami", type=str)
    parser.add_argument(
        "--python-version",
        type=str,
        choices=[f"3.{d}" for d in range(6, 12)],
        default=None,
    )
    parser.add_argument("--alloc-instance", action="store_true")
    parser.add_argument("--list-instances", action="store_true")
    parser.add_argument("--pytorch-only", action="store_true")
    parser.add_argument("--keep-running", action="store_true")
    parser.add_argument("--terminate-instances", action="store_true")
    parser.add_argument("--instance-type", type=str, default="t4g.2xlarge")
    parser.add_argument("--ebs-size", type=int, default=50)
    parser.add_argument("--branch", type=str, default="main")
    parser.add_argument("--use-docker", action="store_true")
    parser.add_argument(
        "--compiler",
        type=str,
        choices=["gcc-7", "gcc-8", "gcc-9", "clang"],
        default="gcc-8",
    )
    parser.add_argument("--use-torch-from-pypi", action="store_true")
    parser.add_argument("--pytorch-build-number", type=str, default=None)
    parser.add_argument("--disable-mkldnn", action="store_true")
    return parser.parse_args()
 if __name__ == "__main__":
    args = parse_arguments()
    ami = (
        args.ami
        if args.ami is not None
        else os_amis[args.os]
        if args.os is not None
        else ubuntu20_04_ami
    )
    keyfile_path, key_name = compute_keyfile_path(args.key_name)
    if args.list_instances:
        list_instances(args.instance_type)
        sys.exit(0)
    if args.terminate_instances:
        terminate_instances(args.instance_type)
        sys.exit(0)
    if len(key_name) == 0:
        raise RuntimeError("""
            Cannot start build without key_name, please specify
            --key-name argument or AWS_KEY_NAME environment variable.""")
    if len(keyfile_path) == 0 or not os.path.exists(keyfile_path):
        raise RuntimeError(f"""
            Cannot find keyfile with name: [{key_name}] in path: [{keyfile_path}], please
            check `~/.ssh/` folder or manually set SSH_KEY_PATH environment variable.""")
    # Starting the instance
    inst = start_instance(
        key_name, ami=ami, instance_type=args.instance_type, ebs_size=args.ebs_size
    )
    instance_name = f"{args.key_name}-{args.os}"
    if args.python_version is not None:
        instance_name += f"-py{args.python_version}"
    inst.create_tags(
        DryRun=False,
        Tags=[
            {
                "Key": "Name",
                "Value": instance_name,
            }
        ],
    )
    addr = inst.public_dns_name
    wait_for_connection(addr, 22)
    host = RemoteHost(addr, keyfile_path)
    host.ami = ami
    if args.use_docker:
        update_apt_repo(host)
        host.start_docker()
    if args.test_only:
        run_tests(host, args.test_only)
        sys.exit(0)
    if args.alloc_instance:
        if args.python_version is None:
            sys.exit(0)
        install_condaforge_python(host, args.python_version)
        sys.exit(0)
    python_version = args.python_version if args.python_version is not None else "3.10"
    if args.use_torch_from_pypi:
        configure_system(host, compiler=args.compiler, python_version=python_version)
        print("Installing PyTorch wheel")
        host.run_cmd("pip3 install torch")
        build_domains(
            host, branch=args.branch, git_clone_flags=" --depth 1 --shallow-submodules"
        )
    else:
        start_build(
            host,
            branch=args.branch,
            compiler=args.compiler,
            python_version=python_version,
            pytorch_only=args.pytorch_only,
            pytorch_build_number=args.pytorch_build_number,
            enable_mkldnn=not args.disable_mkldnn,
        )
    if not args.keep_running:
        print(f"Waiting for instance {inst.id} to terminate")
        inst.terminate()
        inst.wait_until_terminated()
--- a/.ci/aarch64_linux/embed_library.py
+++ b/.ci/aarch64_linux/embed_library.py
@ -1,87 +0,0 @@
 #!/usr/bin/env python3
 import os
 import shutil
 import sys
 from subprocess import check_call
 from tempfile import TemporaryDirectory
 from auditwheel.elfutils import elf_file_filter
 from auditwheel.lddtree import lddtree
 from auditwheel.patcher import Patchelf
 from auditwheel.repair import copylib
 from auditwheel.wheeltools import InWheelCtx
 def replace_tag(filename):
    with open(filename) as f:
        lines = f.read().split("\\n")
    for i, line in enumerate(lines):
        if not line.startswith("Tag: "):
            continue
        lines[i] = line.replace("-linux_", "-manylinux2014_")
        print(f"Updated tag from {line} to {lines[i]}")
    with open(filename, "w") as f:
        f.write("\\n".join(lines))
 class AlignedPatchelf(Patchelf):
    def set_soname(self, file_name: str, new_soname: str) -> None:
        check_call(
            ["patchelf", "--page-size", "65536", "--set-soname", new_soname, file_name]
        )
    def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None:
        check_call(
            [
                "patchelf",
                "--page-size",
                "65536",
                "--replace-needed",
                soname,
                new_soname,
                file_name,
            ]
        )
 def embed_library(whl_path, lib_soname, update_tag=False):
    patcher = AlignedPatchelf()
    out_dir = TemporaryDirectory()
    whl_name = os.path.basename(whl_path)
    tmp_whl_name = os.path.join(out_dir.name, whl_name)
    with InWheelCtx(whl_path) as ctx:
        torchlib_path = os.path.join(ctx._tmpdir.name, "torch", "lib")
        ctx.out_wheel = tmp_whl_name
        new_lib_path, new_lib_soname = None, None
        for filename, _ in elf_file_filter(ctx.iter_files()):
            if not filename.startswith("torch/lib"):
                continue
            libtree = lddtree(filename)
            if lib_soname not in libtree["needed"]:
                continue
            lib_path = libtree["libs"][lib_soname]["path"]
            if lib_path is None:
                print(f"Can't embed {lib_soname} as it could not be found")
                break
            if lib_path.startswith(torchlib_path):
                continue
            if new_lib_path is None:
                new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher)
            patcher.replace_needed(filename, lib_soname, new_lib_soname)
            print(f"Replacing {lib_soname} with {new_lib_soname} for {filename}")
        if update_tag:
            # Add manylinux2014 tag
            for filename in ctx.iter_files():
                if os.path.basename(filename) != "WHEEL":
                    continue
                replace_tag(filename)
    shutil.move(tmp_whl_name, whl_path)
 if __name__ == "__main__":
    embed_library(
        sys.argv[1], "libgomp.so.1", len(sys.argv) > 2 and sys.argv[2] == "--update-tag"
    )
--- a/.ci/caffe2/README.md
+++ b/.ci/caffe2/README.md
@ -1,12 +0,0 @@
 # Jenkins
 The scripts in this directory are the entrypoint for testing Caffe2.
 The environment variable `BUILD_ENVIRONMENT` is expected to be set to
 the build environment you intend to test. It is a hint for the build
 and test scripts to configure Caffe2 a certain way and include/exclude
 tests. Docker images, they equal the name of the image itself. For
 example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
 built on Jenkins and are used in triggered builds already have this
 environment variable set in their manifest. Also see
 `./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
--- a/.ci/caffe2/common.sh
+++ b/.ci/caffe2/common.sh
@ -1,36 +0,0 @@
 set -ex
 LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
 TEST_DIR="$ROOT_DIR/test"
 gtest_reports_dir="${TEST_DIR}/test-reports/cpp"
 pytest_reports_dir="${TEST_DIR}/test-reports/python"
 # Figure out which Python to use
 PYTHON="$(which python)"
 if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
  PYTHON=$(which "python${BASH_REMATCH[1]}")
 fi
 if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
    # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
    unset HIP_PLATFORM
    if which sccache > /dev/null; then
        # Save sccache logs to file
        sccache --stop-server || true
        rm -f ~/sccache_error.log || true
        SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
        # Report sccache stats for easier debugging
        sccache --zero-stats
    fi
 fi
 # /usr/local/caffe2 is where the cpp bits are installed to in cmake-only
 # builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
 # that the test code in .ci/test.sh is the same
 INSTALL_PREFIX="/usr/local/caffe2"
 mkdir -p "$gtest_reports_dir" || true
 mkdir -p "$pytest_reports_dir" || true
 mkdir -p "$INSTALL_PREFIX" || true
--- a/.ci/caffe2/test.sh
+++ b/.ci/caffe2/test.sh
@ -1,168 +0,0 @@
 #!/bin/bash
 # shellcheck source=./common.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
  pip install click mock tabulate networkx==2.0
  pip -q install "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
 fi
 # Skip tests in environments where they are not built/applicable
 if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
  echo 'Skipping tests'
  exit 0
 fi
 # These additional packages are needed for circleci ROCm builds.
 if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
    # defaults installs the most recent networkx version, so we install this lower
    # version explicitly before scikit-image pulls it in as a dependency
    pip install networkx==2.0
    # click - onnx
    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
 fi
 # Find where cpp tests and Caffe2 itself are installed
 if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
  # For cmake only build we install everything into /usr/local
  cpp_test_dir="$INSTALL_PREFIX/cpp_test"
  ld_library_path="$INSTALL_PREFIX/lib"
 else
  # For Python builds we install into python
  # cd to /usr first so the python import doesn't get confused by any 'caffe2'
  # directory in cwd
  python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
  caffe2_pypath="$python_installation/caffe2"
  cpp_test_dir="$python_installation/torch/test"
  ld_library_path="$python_installation/torch/lib"
 fi
 ################################################################################
 # C++ tests #
 ################################################################################
 # Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
 if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
  echo "Running C++ tests.."
  for test in $(find "$cpp_test_dir" -executable -type f); do
    case "$test" in
      # skip tests we know are hanging or bad
      */mkl_utils_test|*/aten/integer_divider_test)
        continue
        ;;
      */scalar_tensor_test|*/basic|*/native_test)
        if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
          continue
        else
          LD_LIBRARY_PATH="$ld_library_path" "$test"
        fi
        ;;
      */*_benchmark)
        LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
        ;;
      *)
        # Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
        # planning to migrate to gtest as the common PyTorch c++ test suite, we
        # currently do NOT use the xml test reporter, because Catch doesn't
        # support multiple reporters
        # c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
        # which means that enabling XML output means you lose useful stdout
        # output for Jenkins.  It's more important to have useful console
        # output than it is to have XML output for Jenkins.
        # Note: in the future, if we want to use xml test reporter once we switch
        # to all gtest, one can simply do:
        LD_LIBRARY_PATH="$ld_library_path" \
            "$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
        ;;
    esac
  done
 fi
 ################################################################################
 # Python tests #
 ################################################################################
 if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
  exit 0
 fi
 # If pip is installed as root, we must use sudo.
 # CircleCI docker images could install conda as jenkins user, or use the OS's python package.
 PIP=$(which pip)
 PIP_USER=$(stat --format '%U' $PIP)
 CURRENT_USER=$(id -u -n)
 if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
  MAYBE_SUDO=sudo
 fi
 # Uninstall pre-installed hypothesis and coverage to use an older version as newer
 # versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
 $MAYBE_SUDO pip -q uninstall -y hypothesis
 $MAYBE_SUDO pip -q uninstall -y coverage
 # "pip install hypothesis==3.44.6" from official server is unreliable on
 # CircleCI, so we host a copy on S3 instead
 $MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
 $MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
 $MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
 # Collect additional tests to run (outside caffe2/python)
 EXTRA_TESTS=()
 # CUDA builds always include NCCL support
 if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
  EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
 fi
 rocm_ignore_test=()
 if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
  # Currently these tests are failing on ROCM platform:
  # On ROCm, RCCL (distributed) development isn't complete.
  # https://github.com/ROCmSoftwarePlatform/rccl
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
  # This test has been flaky in ROCm CI (but note the tests are
  # cpu-only so should be unrelated to ROCm)
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
  # This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
  # This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
 fi
 echo "Running Python tests.."
 # locale setting is required by click package
 for loc in "en_US.utf8" "C.UTF-8"; do
  if locale -a | grep "$loc" >/dev/null 2>&1; then
    export LC_ALL="$loc"
    export LANG="$loc"
    break;
  fi
 done
 # Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
 export DNNL_MAX_CPU_ISA=AVX2
 # Should still run even in the absence of SHARD_NUMBER
 if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
  # TODO(sdym@meta.com) remove this when the linked issue resolved.
  # py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
  pip install py==1.11.0
  pip install pytest-sugar
  # NB: Warnings are disabled because they make it harder to see what
  # the actual erroring test is
  "$PYTHON" \
    -m pytest \
    -x \
    -v \
    --disable-warnings \
    --junit-xml="$pytest_reports_dir/result.xml" \
    --ignore "$caffe2_pypath/python/test/executor_test.py" \
    --ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
    --ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
    --ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
    --ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
    ${rocm_ignore_test[@]} \
    "$caffe2_pypath/python" \
    "${EXTRA_TESTS[@]}"
 fi
--- a/.ci/docker/README.md
+++ b/.ci/docker/README.md
@ -1,139 +0,0 @@
 # Docker images for GitHub CI and CD
 This directory contains everything needed to build the Docker images
 that are used in our CI.
 The Dockerfiles located in subdirectories are parameterized to
 conditionally run build stages depending on build arguments passed to
 `docker build`. This lets us use only a few Dockerfiles for many
 images. The different configurations are identified by a freeform
 string that we call a _build environment_. This string is persisted in
 each image as the `BUILD_ENVIRONMENT` environment variable.
 See `build.sh` for valid build environments (it's the giant switch).
 ## Docker CI builds
 * `build.sh` -- dispatch script to launch all builds
 * `common` -- scripts used to execute individual Docker build stages
 * `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs
 * `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
 * `ubuntu-rocm` -- Dockerfile for Ubuntu image with ROCm support
 * `ubuntu-xpu` -- Dockerfile for Ubuntu image with XPU support
 ### Docker CD builds
 * `conda` - Dockerfile and build.sh to build Docker images used in nightly conda builds
 * `manywheel` - Dockerfile and build.sh to build Docker images used in nightly manywheel builds
 * `libtorch` - Dockerfile and build.sh to build Docker images used in nightly libtorch builds
 ## Usage
 ```bash
 # Build a specific image
 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 # Set flags (see build.sh) and build image
 sudo bash -c 'TRITON=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
 ```
 ## [Guidance] Adding a New Base Docker Image
 ### Background
 The base Docker images in directory `.ci/docker/` are built by the `docker-builds.yml` workflow. Those images are used throughout the PyTorch CI/CD pipeline. You should only create or modify a base Docker image if you need specific environment changes or dependencies before building PyTorch on CI.
 1. **Automatic Rebuilding**:
   - The Docker image building process is triggered automatically when changes are made to files in the `.ci/docker/*` directory
   - This ensures all images stay up-to-date with the latest dependencies and configurations
 2. **Image Reuse in PyTorch Build Workflows** (example: linux-build):
   - The images generated by `docker-builds.yml` are reused in `_linux-build.yml` through the `calculate-docker-image` step
   - The `_linux-build.yml` workflow:
     - Pulls the Docker image determined by the `calculate-docker-image` step
     - Runs a Docker container with that image
     - Executes `.ci/pytorch/build.sh` inside the container to build PyTorch
 3. **Usage in Test Workflows** (example: linux-test):
   - The same Docker images are also used in `_linux-test.yml` for running tests
   - The `_linux-test.yml` workflow follows a similar pattern:
     - It uses the `calculate-docker-image` step to determine which Docker image to use
     - It pulls the Docker image and runs a container with that image
     - It installs the wheels from the artifacts generated by PyTorch build jobs
     - It executes test scripts (like `.ci/pytorch/test.sh` or `.ci/pytorch/multigpu-test.sh`) inside the container
 ### Understanding File Purposes
 #### `.ci/docker/build.sh` vs `.ci/pytorch/build.sh`
 - **`.ci/docker/build.sh`**:
  - Used for building base Docker images
  - Executed by the `docker-builds.yml` workflow to pre-build Docker images for CI
  - Contains configurations for different Docker build environments
 - **`.ci/pytorch/build.sh`**:
  - Used for building PyTorch inside a Docker container
  - Called by workflows like `_linux-build.yml` after the Docker container is started
  - Builds PyTorch wheels and other artifacts
 #### `.ci/docker/ci_commit_pins/` vs `.github/ci_commit_pins`
 - **`.ci/docker/ci_commit_pins/`**:
  - Used for pinning dependency versions during base Docker image building
  - Ensures consistent environments for building PyTorch
  - Changes here trigger base Docker image rebuilds
 - **`.github/ci_commit_pins`**:
  - Used for pinning dependency versions during PyTorch building and tests
  - Ensures consistent dependencies for PyTorch across different builds
  - Used by build scripts running inside Docker containers
 ### Step-by-Step Guide for Adding a New Base Docker Image
 #### 1. Add Pinned Commits (If Applicable)
 We use pinned commits for build stability. The `nightly.yml` workflow checks and updates pinned commits for certain repository dependencies daily.
 If your new Docker image needs a library installed from a specific pinned commit or built from source:
 1. Add the repository you want to track in `nightly.yml` and `merge-rules.yml`
 2. Add the initial pinned commit in `.ci/docker/ci_commit_pins/`. The text filename should match the one defined in step 1
 #### 2. Configure the Base Docker Image
 1. **Add new Base Docker image configuration** (if applicable):
   Add the configuration in `.ci/docker/build.sh`. For example:
   ```bash
   pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-new1)
     CUDA_VERSION=12.8.1
     ANACONDA_PYTHON_VERSION=3.12
     GCC_VERSION=11
     VISION=yes
     KATEX=yes
     UCX_COMMIT=${_UCX_COMMIT}
     UCC_COMMIT=${_UCC_COMMIT}
     TRITON=yes
     NEW_ARG_1=yes
     ;;
   ```
 2. **Add build arguments to Docker build command**:
   If you're introducing a new argument to the Docker build, make sure to add it in the Docker build step in `.ci/docker/build.sh`:
   ```bash
   docker build \
     ....
     --build-arg "NEW_ARG_1=${NEW_ARG_1}"
   ```
 3. **Update Dockerfile logic**:
   Update the Dockerfile to use the new argument. For example, in `ubuntu/Dockerfile`:
   ```dockerfile
   ARG NEW_ARG_1
   # Set up environment for NEW_ARG_1
   RUN if [ -n "${NEW_ARG_1}" ]; then bash ./do_something.sh; fi
   ```
 4. **Add the Docker configuration** in `.github/workflows/docker-builds.yml`:
   The `docker-builds.yml` workflow pre-builds the Docker images whenever changes occur in the `.ci/docker/` directory. This includes the
   pinned commit updates.
--- a/.ci/docker/almalinux/Dockerfile
+++ b/.ci/docker/almalinux/Dockerfile
@ -1,107 +0,0 @@
 ARG CUDA_VERSION=12.6
 ARG BASE_TARGET=cuda${CUDA_VERSION}
 ARG ROCM_IMAGE=rocm/dev-almalinux-8:6.3-complete
 FROM amd64/almalinux:8.10-20250519 as base
 ENV LC_ALL en_US.UTF-8
 ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8
 ARG DEVTOOLSET_VERSION=11
 RUN yum -y update
 RUN yum -y install epel-release
 # install glibc-langpack-en make sure en_US.UTF-8 locale is available
 RUN yum -y install glibc-langpack-en
 RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
 # Just add everything as a safe.directory for git since these will be used in multiple places with git
 RUN git config --global --add safe.directory '*'
 ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 # cmake-3.18.4 from pip
 RUN yum install -y python3-pip && \
    python3 -mpip install cmake==3.18.4 && \
    ln -s /usr/local/bin/cmake /usr/bin/cmake3
 RUN rm -rf /usr/local/cuda-*
 FROM base as openssl
 ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
 FROM base as patchelf
 # Install patchelf
 ADD ./common/install_patchelf.sh install_patchelf.sh
 RUN bash ./install_patchelf.sh && rm install_patchelf.sh && cp $(which patchelf) /patchelf
 FROM base as conda
 # Install Anaconda
 ADD ./common/install_conda_docker.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
 # Install CUDA
 FROM base as cuda
 ARG CUDA_VERSION=12.6
 RUN rm -rf /usr/local/cuda-*
 ADD ./common/install_cuda.sh install_cuda.sh
 COPY ./common/install_nccl.sh install_nccl.sh
 COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
 COPY ./common/install_cusparselt.sh install_cusparselt.sh
 ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
 # Preserve CUDA_VERSION for the builds
 ENV CUDA_VERSION=${CUDA_VERSION}
 # Make things in our path by default
 ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:$PATH
 FROM cuda as cuda12.6
 RUN bash ./install_cuda.sh 12.6
 ENV DESIRED_CUDA=12.6
 FROM cuda as cuda12.8
 RUN bash ./install_cuda.sh 12.8
 ENV DESIRED_CUDA=12.8
 FROM cuda as cuda12.9
 RUN bash ./install_cuda.sh 12.9
 ENV DESIRED_CUDA=12.9
 FROM cuda as cuda13.0
 RUN bash ./install_cuda.sh 13.0
 ENV DESIRED_CUDA=13.0
 FROM ${ROCM_IMAGE} as rocm
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 ADD ./common/install_mkl.sh install_mkl.sh
 RUN bash ./install_mkl.sh && rm install_mkl.sh
 ENV MKLROOT /opt/intel
 # Install MNIST test data
 FROM base as mnist
 ADD ./common/install_mnist.sh install_mnist.sh
 RUN bash ./install_mnist.sh
 FROM base as all_cuda
 COPY --from=cuda12.6  /usr/local/cuda-12.6 /usr/local/cuda-12.6
 COPY --from=cuda12.8  /usr/local/cuda-12.8 /usr/local/cuda-12.8
 COPY --from=cuda12.9  /usr/local/cuda-12.9 /usr/local/cuda-12.9
 COPY --from=cuda13.0  /usr/local/cuda-13.0 /usr/local/cuda-13.0
 # Final step
 FROM ${BASE_TARGET} as final
 COPY --from=openssl            /opt/openssl           /opt/openssl
 COPY --from=patchelf           /patchelf              /usr/local/bin/patchelf
 COPY --from=conda              /opt/conda             /opt/conda
 # Add jni.h for java host build.
 COPY ./common/install_jni.sh install_jni.sh
 COPY ./java/jni.h jni.h
 RUN bash ./install_jni.sh && rm install_jni.sh
 ENV PATH /opt/conda/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 COPY --from=mnist  /usr/local/mnist /usr/local/mnist
 RUN rm -rf /usr/local/cuda
 RUN chmod o+rw /usr/local
 RUN touch /.condarc && \
    chmod o+rw /.condarc && \
    chmod -R o+rw /opt/conda
--- a/.ci/docker/almalinux/build.sh
+++ b/.ci/docker/almalinux/build.sh
@ -1,76 +0,0 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline
 set -exou pipefail
 image="$1"
 shift
 if [ -z "${image}" ]; then
  echo "Usage: $0 IMAGENAME:ARCHTAG"
  exit 1
 fi
 # Go from imagename:tag to tag
 DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
 CUDA_VERSION=""
 ROCM_VERSION=""
 EXTRA_BUILD_ARGS=""
 if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
    # extract cuda version from image name and tag.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
    CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
    EXTRA_BUILD_ARGS="--build-arg CUDA_VERSION=${CUDA_VERSION}"
 elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
    # extract rocm version from image name and tag.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
    ROCM_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
    EXTRA_BUILD_ARGS="--build-arg ROCM_IMAGE=rocm/dev-almalinux-8:${ROCM_VERSION}-complete"
 fi
 case ${DOCKER_TAG_PREFIX} in
  cpu)
    BASE_TARGET=base
    ;;
  cuda*)
    BASE_TARGET=cuda${CUDA_VERSION}
    ;;
  rocm*)
    BASE_TARGET=rocm
    PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
    # add gfx950, gfx115x conditionally starting in ROCm 7.0
    if [[ "$ROCM_VERSION" == *"7.0"* ]]; then
        PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
    fi
    EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
    ;;
  *)
    echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
    exit 1
    ;;
 esac
 # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
 # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
 sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
 sudo systemctl daemon-reload
 sudo systemctl restart docker
 export DOCKER_BUILDKIT=1
 TOPDIR=$(git rev-parse --show-toplevel)
 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 docker build \
  --target final \
  --progress plain \
  --build-arg "BASE_TARGET=${BASE_TARGET}" \
  --build-arg "DEVTOOLSET_VERSION=11" \
  ${EXTRA_BUILD_ARGS} \
  -t ${tmp_tag} \
  $@ \
  -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
  ${TOPDIR}/.ci/docker/
 if [ -n "${CUDA_VERSION}" ]; then
  # Test that we're using the right CUDA compiler
  docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
 fi
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -1,445 +0,0 @@
 #!/bin/bash
 # The purpose of this script is to:
 # 1. Extract the set of parameters to be used for a docker build based on the provided image name.
 # 2. Run docker build with the parameters found in step 1.
 # 3. Run the built image and print out the expected and actual versions of packages installed.
 set -ex
 image="$1"
 shift
 if [ -z "${image}" ]; then
  echo "Usage: $0 IMAGE"
  exit 1
 fi
 function extract_version_from_image_name() {
  eval export $2=$(echo "${image}" | perl -n -e"/$1(\d+(\.\d+)?(\.\d+)?)/ && print \$1")
  if [ "x${!2}" = x ]; then
    echo "variable '$2' not correctly parsed from image='$image'"
    exit 1
  fi
 }
 function extract_all_from_image_name() {
  # parts $image into array, splitting on '-'
  keep_IFS="$IFS"
  IFS="-"
  declare -a parts=($image)
  IFS="$keep_IFS"
  unset keep_IFS
  for part in "${parts[@]}"; do
    name=$(echo "${part}" | perl -n -e"/([a-zA-Z]+)\d+(\.\d+)?(\.\d+)?/ && print \$1")
    vername="${name^^}_VERSION"
    # "py" is the odd one out, needs this special case
    if [ "x${name}" = xpy ]; then
      vername=ANACONDA_PYTHON_VERSION
    fi
    # skip non-conforming fields such as "pytorch", "linux" or "bionic" without version string
    if [ -n "${name}" ]; then
      extract_version_from_image_name "${name}" "${vername}"
    fi
  done
 }
 # Use the same pre-built XLA test image from PyTorch/XLA
 if [[ "$image" == *xla* ]]; then
  echo "Using pre-built XLA test image..."
  exit 0
 fi
 if [[ "$image" == *-jammy* ]]; then
  UBUNTU_VERSION=22.04
 elif [[ "$image" == *-noble* ]]; then
  UBUNTU_VERSION=24.04
 elif [[ "$image" == *ubuntu* ]]; then
  extract_version_from_image_name ubuntu UBUNTU_VERSION
 fi
 if [ -n "${UBUNTU_VERSION}" ]; then
  OS="ubuntu"
 else
  echo "Unable to derive operating system base..."
  exit 1
 fi
 DOCKERFILE="${OS}/Dockerfile"
 if [[ "$image" == *rocm* ]]; then
  DOCKERFILE="${OS}-rocm/Dockerfile"
 elif [[ "$image" == *xpu* ]]; then
  DOCKERFILE="${OS}-xpu/Dockerfile"
 elif [[ "$image" == *cuda*linter* ]]; then
  # Use a separate Dockerfile for linter to keep a small image size
  DOCKERFILE="linter-cuda/Dockerfile"
 elif [[ "$image" == *linter* ]]; then
  # Use a separate Dockerfile for linter to keep a small image size
  DOCKERFILE="linter/Dockerfile"
 elif [[ "$image" == *riscv* ]]; then
  # Use RISC-V specific Dockerfile
  DOCKERFILE="ubuntu-cross-riscv/Dockerfile"
 fi
 _UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152
 _UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96
 if [[ "$image" == *rocm* ]]; then
  _UCX_COMMIT=29831d319e6be55cb8c768ca61de335c934ca39e
  _UCC_COMMIT=9f4b242cbbd8b1462cbc732eb29316cdfa124b77
 fi
 tag=$(echo $image | awk -F':' '{print $2}')
 # It's annoying to rename jobs every time you want to rewrite a
 # configuration, so we hardcode everything here rather than do it
 # from scratch
 case "$tag" in
  pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11)
    CUDA_VERSION=12.4
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    TRITON=yes
    ;;
  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11)
    CUDA_VERSION=12.8.1
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    TRITON=yes
    INSTALL_MINGW=yes
    ;;
  pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
    CUDA_VERSION=13.0.0
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    TRITON=yes
    ;;
  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
    CUDA_VERSION=12.8.1
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=9
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    TRITON=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
  pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm)
    CUDA_VERSION=12.8.1
    ANACONDA_PYTHON_VERSION=3.12
    GCC_VERSION=11
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    TRITON=yes
    ;;
  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
    CUDA_VERSION=12.8.1
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=9
    VISION=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    TRITON=yes
    ;;
  pytorch-linux-jammy-py3-clang12-onnx)
    ANACONDA_PYTHON_VERSION=3.10
    CLANG_VERSION=12
    VISION=yes
    ONNX=yes
    ;;
  pytorch-linux-jammy-py3.10-clang12)
    ANACONDA_PYTHON_VERSION=3.10
    CLANG_VERSION=12
    VISION=yes
    TRITON=yes
    ;;
  pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
    if [[ $tag =~ "jammy" ]]; then
      ANACONDA_PYTHON_VERSION=3.10
    else
      ANACONDA_PYTHON_VERSION=3.12
    fi
    GCC_VERSION=11
    VISION=yes
    ROCM_VERSION=7.0
    NINJA_VERSION=1.9.0
    TRITON=yes
    KATEX=yes
    UCX_COMMIT=${_UCX_COMMIT}
    UCC_COMMIT=${_UCC_COMMIT}
    PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950;gfx1100"
    if [[ $tag =~ "benchmarks" ]]; then
      INDUCTOR_BENCHMARKS=yes
    fi
    ;;
  pytorch-linux-jammy-xpu-n-1-py3)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    XPU_VERSION=2025.1
    NINJA_VERSION=1.9.0
    TRITON=yes
    ;;
  pytorch-linux-jammy-xpu-n-py3)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    XPU_VERSION=2025.2
    NINJA_VERSION=1.9.0
    TRITON=yes
    ;;
  pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    KATEX=yes
    TRITON=yes
    DOCS=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
  pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12)
    ANACONDA_PYTHON_VERSION=3.10
    CUDA_VERSION=12.8.1
    CLANG_VERSION=12
    VISION=yes
    TRITON=yes
    ;;
  pytorch-linux-jammy-py3-clang18-asan)
    ANACONDA_PYTHON_VERSION=3.10
    CLANG_VERSION=18
    VISION=yes
    ;;
  pytorch-linux-jammy-py3.10-gcc11)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    VISION=yes
    KATEX=yes
    TRITON=yes
    DOCS=yes
    UNINSTALL_DILL=yes
    ;;
  pytorch-linux-jammy-py3-clang12-executorch)
    ANACONDA_PYTHON_VERSION=3.10
    CLANG_VERSION=12
    EXECUTORCH=yes
    ;;
  pytorch-linux-jammy-py3.12-halide)
    CUDA_VERSION=12.6
    ANACONDA_PYTHON_VERSION=3.12
    GCC_VERSION=11
    HALIDE=yes
    TRITON=yes
    ;;
  pytorch-linux-jammy-py3.12-triton-cpu)
    CUDA_VERSION=12.6
    ANACONDA_PYTHON_VERSION=3.12
    GCC_VERSION=11
    TRITON_CPU=yes
    ;;
  pytorch-linux-jammy-linter)
    PYTHON_VERSION=3.10
    ;;
  pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
    PYTHON_VERSION=3.10
    CUDA_VERSION=12.8.1
    ;;
  pytorch-linux-jammy-aarch64-py3.10-gcc11)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    ACL=yes
    VISION=yes
    OPENBLAS=yes
    # snadampal: skipping llvm src build install because the current version
    # from pytorch/llvm:9.0.1 is x86 specific
    SKIP_LLVM_SRC_BUILD_INSTALL=yes
    ;;
  pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks)
    ANACONDA_PYTHON_VERSION=3.10
    GCC_VERSION=11
    ACL=yes
    VISION=yes
    OPENBLAS=yes
    # snadampal: skipping llvm src build install because the current version
    # from pytorch/llvm:9.0.1 is x86 specific
    SKIP_LLVM_SRC_BUILD_INSTALL=yes
    INDUCTOR_BENCHMARKS=yes
    ;;
  pytorch-linux-noble-riscv64-py3.12-gcc14)
    GCC_VERSION=14
    ;;
  *)
    # Catch-all for builds that are not hardcoded.
    VISION=yes
    echo "image '$image' did not match an existing build configuration"
    if [[ "$image" == *py* ]]; then
      extract_version_from_image_name py ANACONDA_PYTHON_VERSION
    fi
    if [[ "$image" == *cuda* ]]; then
      extract_version_from_image_name cuda CUDA_VERSION
    fi
    if [[ "$image" == *rocm* ]]; then
      extract_version_from_image_name rocm ROCM_VERSION
      NINJA_VERSION=1.9.0
      TRITON=yes
      # To ensure that any ROCm config will build using conda cmake
      # and thus have LAPACK/MKL enabled
      fi
    if [[ "$image" == *centos7* ]]; then
      NINJA_VERSION=1.10.2
    fi
    if [[ "$image" == *gcc* ]]; then
      extract_version_from_image_name gcc GCC_VERSION
    fi
    if [[ "$image" == *clang* ]]; then
      extract_version_from_image_name clang CLANG_VERSION
    fi
    if [[ "$image" == *devtoolset* ]]; then
      extract_version_from_image_name devtoolset DEVTOOLSET_VERSION
    fi
    if [[ "$image" == *glibc* ]]; then
      extract_version_from_image_name glibc GLIBC_VERSION
    fi
  ;;
 esac
 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 no_cache_flag=""
 progress_flag=""
 # Do not use cache and progress=plain when in CI
 if [[ -n "${CI:-}" ]]; then
  no_cache_flag="--no-cache"
  progress_flag="--progress=plain"
 fi
 # Build image
 docker build \
       ${no_cache_flag} \
       ${progress_flag} \
       --build-arg "BUILD_ENVIRONMENT=${image}" \
       --build-arg "LLVMDEV=${LLVMDEV:-}" \
       --build-arg "VISION=${VISION:-}" \
       --build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
       --build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
       --build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
       --build-arg "CLANG_VERSION=${CLANG_VERSION}" \
       --build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
       --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
       --build-arg "GCC_VERSION=${GCC_VERSION}" \
       --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
       --build-arg "KATEX=${KATEX:-}" \
       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" \
       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
       --build-arg "UCX_COMMIT=${UCX_COMMIT}" \
       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
       --build-arg "TRITON=${TRITON}" \
       --build-arg "TRITON_CPU=${TRITON_CPU}" \
       --build-arg "ONNX=${ONNX}" \
       --build-arg "DOCS=${DOCS}" \
       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
       --build-arg "EXECUTORCH=${EXECUTORCH}" \
       --build-arg "HALIDE=${HALIDE}" \
       --build-arg "XPU_VERSION=${XPU_VERSION}" \
       --build-arg "UNINSTALL_DILL=${UNINSTALL_DILL}" \
       --build-arg "ACL=${ACL:-}" \
       --build-arg "OPENBLAS=${OPENBLAS:-}" \
       --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
       --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
       --build-arg "INSTALL_MINGW=${INSTALL_MINGW:-}" \
       -f $(dirname ${DOCKERFILE})/Dockerfile \
       -t "$tmp_tag" \
       "$@" \
       .
 # NVIDIA dockers for RC releases use tag names like `11.0-cudnn9-devel-ubuntu18.04-rc`,
 # for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
 # find the correct image. As a result, here we have to replace the
 #   "$UBUNTU_VERSION" == "18.04-rc"
 # with
 #   "$UBUNTU_VERSION" == "18.04"
 UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
 function drun() {
  docker run --rm "$tmp_tag" "$@"
 }
 if [[ "$OS" == "ubuntu" ]]; then
  if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
    echo "OS=ubuntu, but:"
    drun lsb_release -a
    exit 1
  fi
  if !(drun lsb_release -a 2>&1 | grep -qF "$UBUNTU_VERSION"); then
    echo "UBUNTU_VERSION=$UBUNTU_VERSION, but:"
    drun lsb_release -a
    exit 1
  fi
 fi
 if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  if !(drun python --version 2>&1 | grep -qF "Python $ANACONDA_PYTHON_VERSION"); then
    echo "ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION, but:"
    drun python --version
    exit 1
  fi
 fi
 if [ -n "$GCC_VERSION" ]; then
  if [[ "$image" == *riscv* ]]; then
    # Check RISC-V cross-compilation toolchain version
    if !(drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
      echo "RISC-V GCC_VERSION=$GCC_VERSION, but:"
      drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version
      exit 1
    fi
  elif !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
    echo "GCC_VERSION=$GCC_VERSION, but:"
    drun gcc --version
    exit 1
  fi
 fi
 if [ -n "$CLANG_VERSION" ]; then
  if !(drun clang --version 2>&1 | grep -qF "clang version $CLANG_VERSION"); then
    echo "CLANG_VERSION=$CLANG_VERSION, but:"
    drun clang --version
    exit 1
  fi
 fi
 if [ -n "$KATEX" ]; then
  if !(drun katex --version); then
    echo "KATEX=$KATEX, but:"
    drun katex --version
    exit 1
  fi
 fi
 HAS_TRITON=$(drun python -c "import triton" > /dev/null 2>&1 && echo "yes" || echo "no")
 if [[ -n "$TRITON" || -n "$TRITON_CPU" ]]; then
  if [ "$HAS_TRITON" = "no" ]; then
    echo "expecting triton to be installed, but it is not"
    exit 1
  fi
 elif [ "$HAS_TRITON" = "yes" ]; then
  echo "expecting triton to not be installed, but it is"
  exit 1
 fi
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -1,109 +0,0 @@
 ARG CENTOS_VERSION
 FROM centos:${CENTOS_VERSION}
 ARG CENTOS_VERSION
 # Set AMD gpu targets to build for
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 # Install required packages to build Caffe2
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Update CentOS git version
 RUN yum -y remove git
 RUN yum -y remove git-*
 RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
    sed -i 's/packages.endpoint/packages.endpointdev/' /etc/yum.repos.d/endpoint.repo
 RUN yum install -y git
 # Install devtoolset
 ARG DEVTOOLSET_VERSION
 COPY ./common/install_devtoolset.sh install_devtoolset.sh
 RUN bash ./install_devtoolset.sh && rm install_devtoolset.sh
 ENV BASH_ENV "/etc/profile"
 # (optional) Install non-default glibc version
 ARG GLIBC_VERSION
 COPY ./common/install_glibc.sh install_glibc.sh
 RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
 RUN rm install_glibc.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG ANACONDA_PYTHON_VERSION
 ARG BUILD_ENVIRONMENT
 ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
 ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
 COPY requirements-ci.txt /opt/conda/requirements-ci.txt
 COPY ./common/install_conda.sh install_conda.sh
 COPY ./common/common_utils.sh common_utils.sh
 RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
 # (optional) Install vision packages like OpenCV
 ARG VISION
 COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
 RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
 RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
 ENV INSTALLED_VISION ${VISION}
 # Install rocm
 ARG ROCM_VERSION
 RUN mkdir ci_commit_pins
 COPY ./common/common_utils.sh common_utils.sh
 COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
 COPY ./common/install_rocm.sh install_rocm.sh
 RUN bash ./install_rocm.sh
 RUN rm install_rocm.sh common_utils.sh
 RUN rm -r ci_commit_pins
 COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
 RUN rm install_rocm_magma.sh
 COPY ./common/install_amdsmi.sh install_amdsmi.sh
 RUN bash ./install_amdsmi.sh
 RUN rm install_amdsmi.sh
 ENV PATH /opt/rocm/bin:$PATH
 ENV PATH /opt/rocm/hcc/bin:$PATH
 ENV PATH /opt/rocm/hip/bin:$PATH
 ENV PATH /opt/rocm/opencl/bin:$PATH
 ENV PATH /opt/rocm/llvm/bin:$PATH
 ENV MAGMA_HOME /opt/rocm/magma
 ENV LANG en_US.utf8
 ENV LC_ALL en_US.utf8
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION
 COPY ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh
 ARG TRITON
 # Install triton, this needs to be done before sccache because the latter will
 # try to reach out to S3, which docker build runners don't have access
 ENV CMAKE_C_COMPILER cc
 ENV CMAKE_CXX_COMPILER c++
 COPY ./common/install_triton.sh install_triton.sh
 COPY ./common/common_utils.sh common_utils.sh
 COPY ci_commit_pins/triton.txt triton.txt
 COPY triton_version.txt triton_version.txt
 RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
 RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
 # Install ccache/sccache (do this last, so we get priority in PATH)
 COPY ./common/install_cache.sh install_cache.sh
 ENV PATH /opt/cache/bin:$PATH
 RUN bash ./install_cache.sh && rm install_cache.sh
 # Include BUILD_ENVIRONMENT environment variable in image
 ARG BUILD_ENVIRONMENT
 ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +0,0 @@
 deb42f2a8e48f5032b4a98ee781a15fa87a157cf
--- a/.ci/docker/ci_commit_pins/halide.txt
+++ b/.ci/docker/ci_commit_pins/halide.txt
@ -1 +0,0 @@
 461c12871f336fe6f57b55d6a297f13ef209161b
--- a/.ci/docker/ci_commit_pins/huggingface-requirements.txt
+++ b/.ci/docker/ci_commit_pins/huggingface-requirements.txt
@ -1,2 +0,0 @@
 transformers==4.56.0
 soxr==0.5.0
--- a/.ci/docker/ci_commit_pins/nccl-cu11.txt
+++ b/.ci/docker/ci_commit_pins/nccl-cu11.txt
@ -1 +0,0 @@
 v2.21.5-1
--- a/.ci/docker/ci_commit_pins/nccl-cu12.txt
+++ b/.ci/docker/ci_commit_pins/nccl-cu12.txt
@ -1 +0,0 @@
 v2.27.5-1
--- a/.ci/docker/ci_commit_pins/nccl-cu13.txt
+++ b/.ci/docker/ci_commit_pins/nccl-cu13.txt
@ -1 +0,0 @@
 v2.27.7-1
--- a/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
+++ b/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
@ -1 +0,0 @@
 7fe50dc3da2069d6645d9deb8c017a876472a977
--- a/.ci/docker/ci_commit_pins/timm.txt
+++ b/.ci/docker/ci_commit_pins/timm.txt
@ -1 +0,0 @@
 5d535d7a2d4b435b1b5c1177fd8f04a12b942b9a
--- a/.ci/docker/ci_commit_pins/torchbench.txt
+++ b/.ci/docker/ci_commit_pins/torchbench.txt
@ -1 +0,0 @@
 74a23feff57432129df84d8099e622773cf77925
--- a/.ci/docker/ci_commit_pins/triton-cpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-cpu.txt
@ -1 +0,0 @@
 c7711371cace304afe265c1ffa906415ab82fc66
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@ -1 +0,0 @@
 1b0418a9a454b2b93ab8d71f40e59d2297157fae
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +0,0 @@
 7416ffcb92cdbe98d9f97e4e6f95247e46dfc9fd
--- a/.ci/docker/common/cache_vision_models.sh
+++ b/.ci/docker/common/cache_vision_models.sh
@ -1,18 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 # Cache the test models at ~/.cache/torch/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/torchvision_import_script.py"
 as_jenkins echo 'import torchvision; torchvision.models.mobilenet_v2(pretrained=True); torchvision.models.mobilenet_v3_large(pretrained=True);' > "${IMPORT_SCRIPT_FILENAME}"
 pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
 # Very weird quoting behavior here https://github.com/conda/conda/issues/10972,
 # so echo the command to a file and run the file instead
 conda_run python "${IMPORT_SCRIPT_FILENAME}"
 # Cleaning up
 conda_run pip uninstall -y torch torchvision
 rm "${IMPORT_SCRIPT_FILENAME}" || true
--- a/.ci/docker/common/common_utils.sh
+++ b/.ci/docker/common/common_utils.sh
@ -1,40 +0,0 @@
 #!/bin/bash
 # Work around bug where devtoolset replaces sudo and breaks it.
 if [ -n "$DEVTOOLSET_VERSION" ]; then
  export SUDO=/bin/sudo
 else
  export SUDO=sudo
 fi
 as_jenkins() {
  # NB: unsetting the environment variables works around a conda bug
  # https://github.com/conda/conda/issues/6576
  # NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
  # NB: This must be run from a directory that jenkins has access to,
  # works around https://github.com/conda/conda-package-handling/pull/34
  $SUDO -E -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
 }
 conda_install() {
  # Ensure that the install command don't upgrade/downgrade Python
  # This should be called as
  #   conda_install pkg1 pkg2 ... [-c channel]
  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
 }
 conda_install_through_forge() {
  as_jenkins conda install -c conda-forge -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
 }
 conda_run() {
  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
 }
 pip_install() {
  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
 }
 get_pinned_commit() {
  cat "${1}".txt
 }
--- a/.ci/docker/common/install_acl.sh
+++ b/.ci/docker/common/install_acl.sh
@ -1,27 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -eux
 ACL_VERSION=${ACL_VERSION:-"v25.02"}
 ACL_INSTALL_DIR="/acl"
 # Clone ACL
 git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules
 ACL_CHECKOUT_DIR="ComputeLibrary"
 # Build with scons
 pushd $ACL_CHECKOUT_DIR
 scons -j8  Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \
  os=linux arch=armv8a build=native multi_isa=1 \
  fixed_format_kernels=1 openmp=1 cppthreads=0
 popd
 # Install ACL
 sudo mkdir -p ${ACL_INSTALL_DIR}
 for d in arm_compute include utils support src build
 do
  sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d}
 done
 rm -rf $ACL_CHECKOUT_DIR
--- a/.ci/docker/common/install_amdsmi.sh
+++ b/.ci/docker/common/install_amdsmi.sh
@ -1,5 +0,0 @@
 #!/bin/bash
 set -ex
 cd /opt/rocm/share/amd_smi && pip install .
--- a/.ci/docker/common/install_base.sh
+++ b/.ci/docker/common/install_base.sh
@ -1,151 +0,0 @@
 #!/bin/bash
 set -ex
 install_ubuntu() {
  # NVIDIA dockers for RC releases use tag names like `11.0-cudnn9-devel-ubuntu18.04-rc`,
  # for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
  # find the correct image. As a result, here we have to check for
  #   "$UBUNTU_VERSION" == "18.04"*
  # instead of
  #   "$UBUNTU_VERSION" == "18.04"
  if [[ "$UBUNTU_VERSION" == "20.04"* ]]; then
    cmake3="cmake=3.16*"
    maybe_libiomp_dev=""
  elif [[ "$UBUNTU_VERSION" == "22.04"* ]]; then
    cmake3="cmake=3.22*"
    maybe_libiomp_dev=""
  elif [[ "$UBUNTU_VERSION" == "24.04"* ]]; then
    cmake3="cmake=3.28*"
    maybe_libiomp_dev=""
  else
    cmake3="cmake=3.5*"
    maybe_libiomp_dev="libiomp-dev"
  fi
  if [[ "$CLANG_VERSION" == 15 ]]; then
    maybe_libomp_dev="libomp-15-dev"
  elif [[ "$CLANG_VERSION" == 12 ]]; then
    maybe_libomp_dev="libomp-12-dev"
  elif [[ "$CLANG_VERSION" == 10 ]]; then
    maybe_libomp_dev="libomp-10-dev"
  else
    maybe_libomp_dev=""
  fi
  # Install common dependencies
  apt-get update
  # TODO: Some of these may not be necessary
  ccache_deps="asciidoc docbook-xml docbook-xsl xsltproc"
  deploy_deps="libffi-dev libbz2-dev libreadline-dev libncurses5-dev libncursesw5-dev libgdbm-dev libsqlite3-dev uuid-dev tk-dev"
  numpy_deps="gfortran"
  apt-get install -y --no-install-recommends \
    $ccache_deps \
    $numpy_deps \
    ${deploy_deps} \
    ${cmake3} \
    apt-transport-https \
    autoconf \
    automake \
    build-essential \
    ca-certificates \
    curl \
    git \
    libatlas-base-dev \
    libc6-dbg \
    ${maybe_libiomp_dev} \
    libyaml-dev \
    libz-dev \
    libjemalloc2 \
    libjpeg-dev \
    libasound2-dev \
    libsndfile-dev \
    ${maybe_libomp_dev} \
    software-properties-common \
    wget \
    sudo \
    vim \
    jq \
    libtool \
    vim \
    unzip \
    gpg-agent \
    gdb \
    bc
  # Should resolve issues related to various apt package repository cert issues
  # see: https://github.com/pytorch/pytorch/issues/65931
  apt-get install -y libgnutls30
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 install_centos() {
  # Need EPEL for many packages we depend on.
  # See http://fedoraproject.org/wiki/EPEL
  yum --enablerepo=extras install -y epel-release
  ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt"
  numpy_deps="gcc-gfortran"
  yum install -y \
    $ccache_deps \
    $numpy_deps \
    autoconf \
    automake \
    bzip2 \
    cmake \
    cmake3 \
    curl \
    gcc \
    gcc-c++ \
    gflags-devel \
    git \
    glibc-devel \
    glibc-headers \
    glog-devel \
    libstdc++-devel \
    libsndfile-devel \
    make \
    opencv-devel \
    sudo \
    wget \
    vim \
    unzip \
    gdb
  # Cleanup
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 }
 # Install base packages depending on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    install_ubuntu
    ;;
  centos)
    install_centos
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 # Install Valgrind separately since the apt-get version is too old.
 mkdir valgrind_build && cd valgrind_build
 VALGRIND_VERSION=3.20.0
 wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
 tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
 cd valgrind-${VALGRIND_VERSION}
 ./configure --prefix=/usr/local
 make -j$[$(nproc) - 2]
 sudo make install
 cd ../../
 rm -rf valgrind_build
 alias valgrind="/usr/local/bin/valgrind"
--- a/.ci/docker/common/install_cache.sh
+++ b/.ci/docker/common/install_cache.sh
@ -1,142 +0,0 @@
 #!/bin/bash
 set -ex
 install_ubuntu() {
  echo "Preparing to build sccache from source"
  apt-get update
  # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
  apt-get install -y cargo
  echo "Checking out sccache repo"
  git clone https://github.com/mozilla/sccache -b v0.10.0
  cd sccache
  echo "Building sccache"
  cargo build --release
  cp target/release/sccache /opt/cache/bin
  echo "Cleaning up"
  cd ..
  rm -rf sccache
  apt-get remove -y cargo rustc
  apt-get autoclean && apt-get clean
  echo "Downloading old sccache binary from S3 repo for PCH builds"
  curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache-0.2.14a
  chmod 755 /opt/cache/bin/sccache-0.2.14a
 }
 install_binary() {
  echo "Downloading sccache binary from S3 repo"
  curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
 }
 mkdir -p /opt/cache/bin
 mkdir -p /opt/cache/lib
 sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
 export PATH="/opt/cache/bin:$PATH"
 # Setup compiler cache
 install_ubuntu
 chmod a+x /opt/cache/bin/sccache
 function write_sccache_stub() {
  # Unset LD_PRELOAD for ps because of asan + ps issues
  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
  if [ $1 == "gcc" ]; then
    # Do not call sccache recursively when dumping preprocessor argument
    # For some reason it's very important for the first cached nvcc invocation
    cat >"/opt/cache/bin/$1" <<EOF
 #!/bin/sh
 # sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively
 for arg in "\$@"; do
  if [ "\$arg" = "-E" ]; then
    exec $(which $1) "\$@"
  fi
 done
 if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
  exec sccache $(which $1) "\$@"
 else
  exec $(which $1) "\$@"
 fi
 EOF
  else
    cat >"/opt/cache/bin/$1" <<EOF
 #!/bin/sh
 if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
  exec sccache $(which $1) "\$@"
 else
  exec $(which $1) "\$@"
 fi
 EOF
  fi
  chmod a+x "/opt/cache/bin/$1"
 }
 write_sccache_stub cc
 write_sccache_stub c++
 write_sccache_stub gcc
 write_sccache_stub g++
 # NOTE: See specific ROCM_VERSION case below.
 if [ "x$ROCM_VERSION" = x ]; then
  write_sccache_stub clang
  write_sccache_stub clang++
 fi
 if [ -n "$CUDA_VERSION" ]; then
  # TODO: This is a workaround for the fact that PyTorch's FindCUDA
  # implementation cannot find nvcc if it is setup this way, because it
  # appears to search for the nvcc in PATH, and use its path to infer
  # where CUDA is installed.  Instead, we install an nvcc symlink outside
  # of the PATH, and set CUDA_NVCC_EXECUTABLE so that we make use of it.
  write_sccache_stub nvcc
  mv /opt/cache/bin/nvcc /opt/cache/lib/
 fi
 if [ -n "$ROCM_VERSION" ]; then
  # ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
  # hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
  # causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
  # directly under /opt/rocm while also preserving the original compiler names.
  # Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
  # Final link in symlink chain must point back to original directory.
  # Original compiler is moved one directory deeper. Wrapper replaces it.
  function write_sccache_stub_rocm() {
    OLDCOMP=$1
    COMPNAME=$(basename $OLDCOMP)
    TOPDIR=$(dirname $OLDCOMP)
    WRAPPED="$TOPDIR/original/$COMPNAME"
    mv "$OLDCOMP" "$WRAPPED"
    printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" >"$OLDCOMP"
    chmod a+x "$OLDCOMP"
  }
  if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
    # ROCm 3.3 or earlier.
    mkdir /opt/rocm/hcc/bin/original
    write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
    write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
    write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
    # Fix last link in symlink chain, clang points to versioned clang in prior dir
    pushd /opt/rocm/hcc/bin/original
    ln -s ../$(readlink clang)
    popd
  elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
    # ROCm 3.5 and beyond.
    mkdir /opt/rocm/llvm/bin/original
    write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
    write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
    # Fix last link in symlink chain, clang points to versioned clang in prior dir
    pushd /opt/rocm/llvm/bin/original
    ln -s ../$(readlink clang)
    popd
  else
    echo "Cannot find ROCm compiler."
    exit 1
  fi
 fi
--- a/.ci/docker/common/install_clang.sh
+++ b/.ci/docker/common/install_clang.sh
@ -1,45 +0,0 @@
 #!/bin/bash
 set -ex
 if [ -n "$CLANG_VERSION" ]; then
  if [[ $UBUNTU_VERSION == 22.04 ]]; then
    # work around ubuntu apt-get conflicts
    sudo apt-get -y -f install
    wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
    if [[ $CLANG_VERSION == 18 ]]; then
      apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
    fi
  fi
  sudo apt-get update
  if [[ $CLANG_VERSION -ge 18 ]]; then
    apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
  else
    apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
  fi
  # Install dev version of LLVM.
  if [ -n "$LLVMDEV" ]; then
    sudo apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"-dev
  fi
  # Use update-alternatives to make this version the default
  update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
  update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
  # Override cc/c++ to clang as well
  update-alternatives --install /usr/bin/cc cc /usr/bin/clang 50
  update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 50
  # clang's packaging is a little messed up (the runtime libs aren't
  # added into the linker path), so give it a little help
  clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
  echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf
  ldconfig
  # Cleanup package manager
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 fi
--- a/.ci/docker/common/install_conda.sh
+++ b/.ci/docker/common/install_conda.sh
@ -1,101 +0,0 @@
 #!/bin/bash
 set -ex
 # Optionally install conda
 if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
  BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"  # @lint-ignore
  CONDA_FILE="Miniforge3-Linux-$(uname -m).sh"
  MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
  MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)
  case "$MAJOR_PYTHON_VERSION" in
    3);;
    *)
      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
      exit 1
      ;;
  esac
  mkdir -p /opt/conda
  chown jenkins:jenkins /opt/conda
  SCRIPT_FOLDER="$( cd "$(dirname "$0")" ; pwd -P )"
  source "${SCRIPT_FOLDER}/common_utils.sh"
  pushd /tmp
  wget -q "${BASE_URL}/${CONDA_FILE}"
  # NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
  as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
  popd
  # NB: Don't do this, rely on the rpath to get it right
  #echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
  #ldconfig
  sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
  export PATH="/opt/conda/bin:$PATH"
  # Ensure we run conda in a directory that jenkins has write access to
  pushd /opt/conda
  # Prevent conda from updating to 4.14.0, which causes docker build failures
  # See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
  # Uncomment the below when resolved to track the latest conda update
  # as_jenkins conda update -y -n base conda
  if [[ $(uname -m) == "aarch64" ]]; then
    export SYSROOT_DEP="sysroot_linux-aarch64=2.17"
  else
    export SYSROOT_DEP="sysroot_linux-64=2.17"
  fi
  # Install correct Python version
  # Also ensure sysroot is using a modern GLIBC to match system compilers
  as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
             python="$ANACONDA_PYTHON_VERSION" \
             ${SYSROOT_DEP}
  # libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
  # which is provided in libstdcxx 12 and up.
  conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge
  # Miniforge installer doesn't install sqlite by default
  if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
    conda_install sqlite
  fi
  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
  if [[ $(uname -m) != "aarch64" ]]; then
    pip_install mkl==2024.2.0
    pip_install mkl-static==2024.2.0
    pip_install mkl-include==2024.2.0
  fi
  # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
  # and libpython-static for torch deploy
  conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"
  # Magma package names are concatenation of CUDA major and minor ignoring revision
  # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
  # Magma is installed from a tarball in the ossci-linux bucket into the conda env
  if [ -n "$CUDA_VERSION" ]; then
    conda_run ${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION})
  fi
  if [[ "$UBUNTU_VERSION" == "24.04"* ]] ; then
    conda_install_through_forge libstdcxx-ng=14
  fi
  # Install some other packages, including those needed for Python test reporting
  pip_install -r /opt/conda/requirements-ci.txt
  if [ -n "$DOCS" ]; then
    apt-get update
    apt-get -y install expect-dev
    # We are currently building docs with python 3.8 (min support version)
    pip_install -r /opt/conda/requirements-docs.txt
  fi
  popd
 fi
--- a/.ci/docker/common/install_conda_docker.sh
+++ b/.ci/docker/common/install_conda_docker.sh
@ -1,20 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -ex
 # Anaconda
 # Latest anaconda is using openssl-3 which is incompatible with all currently published versions of git
 # Which are using openssl-1.1.1, see https://anaconda.org/anaconda/git/files?version=2.40.1 for example
 MINICONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Linux-x86_64.sh
 wget -q $MINICONDA_URL
 # NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
 bash $(basename "$MINICONDA_URL") -b -p /opt/conda
 rm $(basename "$MINICONDA_URL")
 export PATH=/opt/conda/bin:$PATH
 # See https://github.com/pytorch/builder/issues/1473
 # Pin conda to 23.5.2 as it's the last one compatible with openssl-1.1.1
 conda install -y conda=23.5.2 conda-build anaconda-client git ninja
 # The cmake version here needs to match with the minimum version of cmake
 # supported by PyTorch (3.18). There is only 3.18.2 on anaconda
 /opt/conda/bin/pip3 install cmake==3.18.2
 conda remove -y --force patchelf
--- a/.ci/docker/common/install_cpython.sh
+++ b/.ci/docker/common/install_cpython.sh
@ -1,103 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -uex -o pipefail
 PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
 GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
 # Python versions to be installed in /opt/$VERSION_NO
 CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t 3.14.0 3.14.0t"}
 function check_var {
    if [ -z "$1" ]; then
        echo "required variable not defined"
        exit 1
    fi
 }
 function do_cpython_build {
    local py_ver=$1
    local py_folder=$2
    check_var $py_ver
    check_var $py_folder
    tar -xzf Python-$py_ver.tgz
    local additional_flags=""
    if [[ "$py_ver" == *"t" ]]; then
        additional_flags=" --disable-gil"
    fi
    pushd $py_folder
    local prefix="/opt/_internal/cpython-${py_ver}"
    mkdir -p ${prefix}/lib
    if [[ -n $(which patchelf) ]]; then
        local shared_flags="--enable-shared"
    else
        local shared_flags="--disable-shared"
    fi
    if [[ -z  "${WITH_OPENSSL+x}" ]]; then
        local openssl_flags=""
    else
        local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto"
    fi
    # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} ${additional_flags} > /dev/null
    make -j40 > /dev/null
    make install > /dev/null
    if [[ "${shared_flags}" == "--enable-shared" ]]; then
        patchelf --set-rpath '$ORIGIN/../lib' ${prefix}/bin/python3
    fi
    popd
    rm -rf $py_folder
    # Some python's install as bin/python3. Make them available as
    # bin/python.
    if [ -e ${prefix}/bin/python3 ]; then
        ln -s python3 ${prefix}/bin/python
    fi
    ${prefix}/bin/python get-pip.py
    if [ -e ${prefix}/bin/pip3 ] && [ ! -e ${prefix}/bin/pip ]; then
        ln -s pip3 ${prefix}/bin/pip
    fi
    # install setuptools since python 3.12 is required to use distutils
    # packaging is needed to create symlink since wheel no longer provides needed information
    ${prefix}/bin/pip install packaging==25.0 wheel==0.45.1 setuptools==80.9.0
    local abi_tag=$(${prefix}/bin/python -c "from packaging.tags import interpreter_name, interpreter_version; import sysconfig ; from sysconfig import get_config_var; print('{0}{1}-{0}{1}{2}'.format(interpreter_name(), interpreter_version(), 't' if sysconfig.get_config_var('Py_GIL_DISABLED') else ''))")
    ln -sf ${prefix} /opt/python/${abi_tag}
 }
 function build_cpython {
    local py_ver=$1
    check_var $py_ver
    local py_suffix=$py_ver
    local py_folder=$py_ver
    # Special handling for nogil
    if [[ "${py_ver}" == *"t" ]]; then
        py_suffix=${py_ver::-1}
        py_folder=$py_suffix
    fi
    wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
    do_cpython_build $py_ver Python-$py_suffix
    rm -f Python-$py_ver.tgz
 }
 function build_cpythons {
    check_var $GET_PIP_URL
    curl -sLO $GET_PIP_URL
    for py_ver in $@; do
        build_cpython $py_ver
    done
    rm -f get-pip.py
 }
 mkdir -p /opt/python
 mkdir -p /opt/_internal
 build_cpythons $CPYTHON_VERSIONS
--- a/.ci/docker/common/install_cuda.sh
+++ b/.ci/docker/common/install_cuda.sh
@ -1,185 +0,0 @@
 #!/bin/bash
 set -ex
 arch_path=''
 targetarch=${TARGETARCH:-$(uname -m)}
 if [ ${targetarch} = 'amd64' ] || [ "${targetarch}" = 'x86_64' ]; then
  arch_path='x86_64'
 else
  arch_path='sbsa'
 fi
 NVSHMEM_VERSION=3.3.24
 function install_cuda {
  version=$1
  runfile=$2
  major_minor=${version%.*}
  rm -rf /usr/local/cuda-${major_minor} /usr/local/cuda
  if [[ ${arch_path} == 'sbsa' ]]; then
      runfile="${runfile}_sbsa"
  fi
  runfile="${runfile}.run"
  wget -q https://developer.download.nvidia.com/compute/cuda/${version}/local_installers/${runfile} -O ${runfile}
  chmod +x ${runfile}
  ./${runfile} --toolkit --silent
  rm -f ${runfile}
  rm -f /usr/local/cuda && ln -s /usr/local/cuda-${major_minor} /usr/local/cuda
 }
 function install_cudnn {
  cuda_major_version=$1
  cudnn_version=$2
  mkdir tmp_cudnn && cd tmp_cudnn
  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
  filepath="cudnn-linux-${arch_path}-${cudnn_version}_cuda${cuda_major_version}-archive"
  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-${arch_path}/${filepath}.tar.xz
  tar xf ${filepath}.tar.xz
  cp -a ${filepath}/include/* /usr/local/cuda/include/
  cp -a ${filepath}/lib/* /usr/local/cuda/lib64/
  cd ..
  rm -rf tmp_cudnn
 }
 function install_nvshmem {
  cuda_major_version=$1      # e.g. "12"
  nvshmem_version=$2         # e.g. "3.3.9"
  case "${arch_path}" in
    sbsa)
      dl_arch="aarch64"
      ;;
    x86_64)
      dl_arch="x64"
      ;;
    *)
      dl_arch="${arch}"
      ;;
  esac
  tmpdir="tmp_nvshmem"
  mkdir -p "${tmpdir}" && cd "${tmpdir}"
  # nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
  # This pattern is a lie as it is not consistent across versions, for 3.3.9 it was cuda_ver-arch-nvshhem-ver
  filename="libnvshmem-linux-${arch_path}-${nvshmem_version}_cuda${cuda_major_version}-archive"
  suffix=".tar.xz"
  url="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/linux-${arch_path}/${filename}${suffix}"
  # download, unpack, install
  wget -q "${url}"
  tar xf "${filename}${suffix}"
  cp -a "${filename}/include/"* /usr/local/cuda/include/
  cp -a "${filename}/lib/"*     /usr/local/cuda/lib64/
  # cleanup
  cd ..
  rm -rf "${tmpdir}"
  echo "nvSHMEM ${nvshmem_version} for CUDA ${cuda_major_version} (${arch_path}) installed."
 }
 function install_124 {
  CUDNN_VERSION=9.1.0.70
  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.2"
  install_cuda 12.4.1 cuda_12.4.1_550.54.15_linux
  install_cudnn 12 $CUDNN_VERSION
  CUDA_VERSION=12.4 bash install_nccl.sh
  CUDA_VERSION=12.4 bash install_cusparselt.sh
  ldconfig
 }
 function install_126 {
  CUDNN_VERSION=9.10.2.21
  echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
  install_cuda 12.6.3 cuda_12.6.3_560.35.05_linux
  install_cudnn 12 $CUDNN_VERSION
  install_nvshmem 12 $NVSHMEM_VERSION
  CUDA_VERSION=12.6 bash install_nccl.sh
  CUDA_VERSION=12.6 bash install_cusparselt.sh
  ldconfig
 }
 function install_129 {
  CUDNN_VERSION=9.10.2.21
  echo "Installing CUDA 12.9.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
  # install CUDA 12.9.1 in the same container
  install_cuda 12.9.1 cuda_12.9.1_575.57.08_linux
  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
  install_cudnn 12 $CUDNN_VERSION
  install_nvshmem 12 $NVSHMEM_VERSION
  CUDA_VERSION=12.9 bash install_nccl.sh
  CUDA_VERSION=12.9 bash install_cusparselt.sh
  ldconfig
 }
 function install_128 {
  CUDNN_VERSION=9.8.0.87
  echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
  # install CUDA 12.8.1 in the same container
  install_cuda 12.8.1 cuda_12.8.1_570.124.06_linux
  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
  install_cudnn 12 $CUDNN_VERSION
  install_nvshmem 12 $NVSHMEM_VERSION
  CUDA_VERSION=12.8 bash install_nccl.sh
  CUDA_VERSION=12.8 bash install_cusparselt.sh
  ldconfig
 }
 function install_130 {
  CUDNN_VERSION=9.13.0.50
  echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
  # install CUDA 13.0 in the same container
  install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
  install_cudnn 13 $CUDNN_VERSION
  install_nvshmem 13 $NVSHMEM_VERSION
  CUDA_VERSION=13.0 bash install_nccl.sh
  CUDA_VERSION=13.0 bash install_cusparselt.sh
  ldconfig
 }
 # idiomatic parameter and option handling in sh
 while test $# -gt 0
 do
    case "$1" in
    12.4) install_124;
        ;;
    12.6|12.6.*) install_126;
        ;;
    12.8|12.8.*) install_128;
        ;;
    12.9|12.9.*) install_129;
        ;;
    13.0|13.0.*) install_130;
        ;;
    *) echo "bad argument $1"; exit 1
        ;;
    esac
    shift
 done
--- a/.ci/docker/common/install_cudss.sh
+++ b/.ci/docker/common/install_cudss.sh
@ -1,25 +0,0 @@
 #!/bin/bash
 set -ex
 # cudss license: https://docs.nvidia.com/cuda/cudss/license.html
 mkdir tmp_cudss && cd tmp_cudss
 if [[ ${CUDA_VERSION:0:4} =~ ^12\.[1-4]$ ]]; then
    arch_path='sbsa'
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
        arch_path='x86_64'
    fi
    CUDSS_NAME="libcudss-linux-${arch_path}-0.3.0.9_cuda12-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudss/redist/libcudss/linux-${arch_path}/${CUDSS_NAME}.tar.xz
    # only for cuda 12
    tar xf ${CUDSS_NAME}.tar.xz
    cp -a ${CUDSS_NAME}/include/* /usr/local/cuda/include/
    cp -a ${CUDSS_NAME}/lib/* /usr/local/cuda/lib64/
 fi
 cd ..
 rm -rf tmp_cudss
 ldconfig
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -1,41 +0,0 @@
 #!/bin/bash
 set -ex
 # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 mkdir tmp_cusparselt && cd tmp_cusparselt
 if [[ ${CUDA_VERSION:0:4} =~ "13" ]]; then
    arch_path='sbsa'
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
        arch_path='x86_64'
    fi
    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.8.0.4_cuda13-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
 elif [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then
    arch_path='sbsa'
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
        arch_path='x86_64'
    fi
    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.7.1.0-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
 elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
    arch_path='sbsa'
    export TARGETARCH=${TARGETARCH:-$(uname -m)}
    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
        arch_path='x86_64'
    fi
    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive"
    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
 else
    echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}"
 fi
 tar xf ${CUSPARSELT_NAME}.tar.xz
 cp -a ${CUSPARSELT_NAME}/include/* /usr/local/cuda/include/
 cp -a ${CUSPARSELT_NAME}/lib/* /usr/local/cuda/lib64/
 cd ..
 rm -rf tmp_cusparselt
 ldconfig
--- a/.ci/docker/common/install_docs_reqs.sh
+++ b/.ci/docker/common/install_docs_reqs.sh
@ -1,25 +0,0 @@
 #!/bin/bash
 set -ex
 if [ -n "$KATEX" ]; then
  apt-get update
  # Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
  apt-get install -y gpg-agent || :
  curl --retry 3 -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
  sudo apt-get install -y nodejs
  curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
  echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
  apt-get update
  apt-get install -y --no-install-recommends yarn
  yarn global add katex --prefix /usr/local
  sudo apt-get -y install doxygen
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 fi
--- a/.ci/docker/common/install_executorch.sh
+++ b/.ci/docker/common/install_executorch.sh
@ -1,68 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 clone_executorch() {
  EXECUTORCH_PINNED_COMMIT=$(get_pinned_commit executorch)
  # Clone the Executorch
  git clone https://github.com/pytorch/executorch.git
  # and fetch the target commit
  pushd executorch
  git checkout "${EXECUTORCH_PINNED_COMMIT}"
  git submodule update --init --recursive
  popd
  chown -R jenkins executorch
 }
 install_buck2() {
  pushd executorch/.ci/docker
  BUCK2_VERSION=$(cat ci_commit_pins/buck2.txt)
  source common/install_buck.sh
  popd
 }
 install_conda_dependencies() {
  pushd executorch/.ci/docker
  # Install conda dependencies like flatbuffer
  conda_install --file conda-env-ci.txt
  popd
 }
 install_pip_dependencies() {
  pushd executorch
  as_jenkins bash install_executorch.sh
  # A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
  # numba and scipy version used in PyTorch CI
  conda_run pip uninstall -y numba scipy
  # Yaspin is needed for running CI test (get_benchmark_analysis_data.py)
  pip_install yaspin==3.1.0
  popd
 }
 setup_executorch() {
  export PYTHON_EXECUTABLE=python
  export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON"
  as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
 }
 if [ $# -eq 0 ]; then
  clone_executorch
  install_buck2
  install_conda_dependencies
  install_pip_dependencies
  pushd executorch
  setup_executorch
  popd
 else
  "$@"
 fi
--- a/.ci/docker/common/install_halide.sh
+++ b/.ci/docker/common/install_halide.sh
@ -1,48 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 COMMIT=$(get_pinned_commit halide)
 test -n "$COMMIT"
 # activate conda to populate CONDA_PREFIX
 test -n "$ANACONDA_PYTHON_VERSION"
 eval "$(conda shell.bash hook)"
 conda activate py_$ANACONDA_PYTHON_VERSION
 if [ -n "${UBUNTU_VERSION}" ];then
    apt update
    apt-get install -y lld liblld-15-dev libpng-dev libjpeg-dev libgl-dev \
                  libopenblas-dev libeigen3-dev libatlas-base-dev libzstd-dev
 fi
 pip_install numpy scipy imageio cmake ninja
 git clone --depth 1 --branch release/16.x --recursive https://github.com/llvm/llvm-project.git
 cmake -DCMAKE_BUILD_TYPE=Release \
        -DLLVM_ENABLE_PROJECTS="clang" \
        -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \
        -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_ENABLE_ASSERTIONS=ON \
        -DLLVM_ENABLE_EH=ON -DLLVM_ENABLE_RTTI=ON -DLLVM_BUILD_32_BITS=OFF \
        -S llvm-project/llvm -B llvm-build -G Ninja
 cmake --build llvm-build
 cmake --install llvm-build --prefix llvm-install
 export LLVM_ROOT=`pwd`/llvm-install
 export LLVM_CONFIG=$LLVM_ROOT/bin/llvm-config
 git clone https://github.com/halide/Halide.git
 pushd Halide
 git checkout ${COMMIT} && git submodule update --init --recursive
 pip_install -r requirements.txt
 # NOTE: pybind has a requirement for cmake > 3.5 so set the minimum cmake version here with a flag
 #       Context: https://github.com/pytorch/pytorch/issues/150420
 cmake -G Ninja -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release -S . -B build
 cmake --build build
 test -e ${CONDA_PREFIX}/lib/python3 || ln -s python${ANACONDA_PYTHON_VERSION} ${CONDA_PREFIX}/lib/python3
 cmake --install build --prefix ${CONDA_PREFIX}
 chown -R jenkins ${CONDA_PREFIX}
 popd
 rm -rf Halide llvm-build llvm-project llvm-install
 python -c "import halide"  # check for errors
--- a/.ci/docker/common/install_inductor_benchmark_deps.sh
+++ b/.ci/docker/common/install_inductor_benchmark_deps.sh
@ -1,46 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 function install_huggingface() {
  pip_install -r huggingface-requirements.txt
 }
 function install_timm() {
  local commit
  commit=$(get_pinned_commit timm)
  pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
 }
 function install_torchbench() {
  local commit
  commit=$(get_pinned_commit torchbench)
  git clone https://github.com/pytorch/benchmark torchbench
  pushd torchbench
  git checkout "$commit"
  python install.py --continue_on_fail
  echo "Print all dependencies after TorchBench is installed"
  python -mpip freeze
  popd
  chown -R jenkins torchbench
  chown -R jenkins /opt/conda
 }
 # Pango is needed for weasyprint which is needed for doctr
 conda_install pango
 # Stable packages are ok here, just to satisfy TorchBench check
 pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
 install_torchbench
 install_huggingface
 install_timm
 # Clean up
 conda_run pip uninstall -y torch torchvision torchaudio triton torchao
--- a/.ci/docker/common/install_libpng.sh
+++ b/.ci/docker/common/install_libpng.sh
@ -1,23 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -ex
 LIBPNG_VERSION=1.6.37
 mkdir -p libpng
 pushd libpng
 wget http://download.sourceforge.net/libpng/libpng-$LIBPNG_VERSION.tar.gz
 tar -xvzf libpng-$LIBPNG_VERSION.tar.gz
 pushd libpng-$LIBPNG_VERSION
 ./configure
 make
 make install
 popd
 popd
 rm -rf libpng
--- a/.ci/docker/common/install_linter.sh
+++ b/.ci/docker/common/install_linter.sh
@ -1,27 +0,0 @@
 #!/bin/bash
 set -ex
 if [ -n "${UBUNTU_VERSION}" ]; then
  apt update
  apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
 fi
 # Do shallow clone of PyTorch so that we can init lintrunner in Docker build context
 git clone https://github.com/pytorch/pytorch.git --depth 1
 chown -R jenkins pytorch
 pushd pytorch
 # Install all linter dependencies
 pip install -r requirements.txt
 lintrunner init
 # Cache .lintbin directory as part of the Docker image
 cp -r .lintbin /tmp
 popd
 # Node dependencies required by toc linter job
 npm install -g markdown-toc
 # Cleaning up
 rm -rf pytorch
--- a/.ci/docker/common/install_magma.sh
+++ b/.ci/docker/common/install_magma.sh
@ -1,27 +0,0 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline
 set -eou pipefail
 function do_install() {
    cuda_version=$1
    cuda_version_nodot=${1/./}
    MAGMA_VERSION="2.6.1"
    magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
    cuda_dir="/usr/local/cuda-${cuda_version}"
    (
        set -x
        tmp_dir=$(mktemp -d)
        pushd ${tmp_dir}
        curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
        tar -xvf "${magma_archive}"
        mkdir -p "${cuda_dir}/magma"
        mv include "${cuda_dir}/magma/include"
        mv lib "${cuda_dir}/magma/lib"
        popd
    )
 }
 do_install $1
--- a/.ci/docker/common/install_magma_conda.sh
+++ b/.ci/docker/common/install_magma_conda.sh
@ -1,23 +0,0 @@
 #!/usr/bin/env bash
 # Script that installs magma from tarball inside conda environment.
 # It replaces anaconda magma-cuda package which is no longer published.
 # Execute it inside active conda environment.
 # See issue: https://github.com/pytorch/pytorch/issues/138506
 set -eou pipefail
 cuda_version_nodot=${1/./}
 anaconda_dir=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
 MAGMA_VERSION="2.6.1"
 magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
 (
    set -x
    tmp_dir=$(mktemp -d)
    pushd ${tmp_dir}
    curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
    tar -xvf "${magma_archive}"
    mv include/* "${anaconda_dir}/include/"
    mv lib/* "${anaconda_dir}/lib"
    popd
 )
--- a/.ci/docker/common/install_mingw.sh
+++ b/.ci/docker/common/install_mingw.sh
@ -1,10 +0,0 @@
 #!/bin/bash
 set -ex
 # Install MinGW-w64 for Windows cross-compilation
 apt-get update
 apt-get install -y g++-mingw-w64-x86-64-posix
 echo "MinGW-w64 installed successfully"
 x86_64-w64-mingw32-g++ --version
--- a/.ci/docker/common/install_miopen.sh
+++ b/.ci/docker/common/install_miopen.sh
@ -1,129 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -ex
 ROCM_VERSION=$1
 if [[ -z $ROCM_VERSION ]]; then
    echo "missing ROCM_VERSION"
    exit 1;
 fi
 IS_UBUNTU=0
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    IS_UBUNTU=1
    ;;
  centos|almalinux)
    IS_UBUNTU=0
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 # To make version comparison easier, create an integer representation.
 save_IFS="$IFS"
 IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
 IFS="$save_IFS"
 if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then
    ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
    ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
    ROCM_VERSION_PATCH=0
 elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then
    ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
    ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
    ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]}
 else
    echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
    exit 1
 fi
 ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
 # Function to retry functions that sometimes timeout or have flaky failures
 retry () {
    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
 }
 # Build custom MIOpen to use comgr for offline compilation.
 ## Need a sanitized ROCM_VERSION without patchlevel; patchlevel version 0 must be added to paths.
 ROCM_DOTS=$(echo ${ROCM_VERSION} | tr -d -c '.' | wc -c)
 if [[ ${ROCM_DOTS} == 1 ]]; then
    ROCM_VERSION_NOPATCH="${ROCM_VERSION}"
    ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}.0"
 else
    ROCM_VERSION_NOPATCH="${ROCM_VERSION%.*}"
    ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}"
 fi
 MIOPEN_CMAKE_COMMON_FLAGS="
 -DMIOPEN_USE_COMGR=ON
 -DMIOPEN_BUILD_DRIVER=OFF
 "
 if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then
    MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
 else
    echo "ROCm ${ROCM_VERSION} does not need any patches, do not build from source"
    exit 0
 fi
 if [[ ${IS_UBUNTU} == 1 ]]; then
  apt-get remove -y miopen-hip
 else
  # Workaround since almalinux manylinux image already has this and cget doesn't like that
  rm -rf /usr/local/lib/pkgconfig/sqlite3.pc
  # Versioned package name needs regex match
  # Use --noautoremove to prevent other rocm packages from being uninstalled
  yum remove -y miopen-hip* --noautoremove
 fi
 git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
 pushd MIOpen
 # remove .git to save disk space since CI runner was running out
 rm -rf .git
 # Don't build CK to save docker build time
 sed -i '/composable_kernel/d' requirements.txt
 ## MIOpen minimum requirements
 cmake -P install_deps.cmake --minimum
 # clean up since CI runner was running out of disk space
 rm -rf /tmp/*
 if [[ ${IS_UBUNTU} == 1 ]]; then
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 else
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 fi
 ## Build MIOpen
 mkdir -p build
 cd build
 PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \
    ${MIOPEN_CMAKE_COMMON_FLAGS} \
    ${MIOPEN_CMAKE_DB_FLAGS} \
    -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}"
 make MIOpen -j $(nproc)
 # Build MIOpen package
 make -j $(nproc) package
 # clean up since CI runner was running out of disk space
 rm -rf /usr/local/cget
 if [[ ${IS_UBUNTU} == 1 ]]; then
  sudo dpkg -i miopen-hip*.deb
 else
  yum install -y miopen-*.rpm
 fi
 popd
 rm -rf MIOpen
--- a/.ci/docker/common/install_mkl.sh
+++ b/.ci/docker/common/install_mkl.sh
@ -1,16 +0,0 @@
 #!/bin/bash
 set -ex
 # MKL
 MKL_VERSION=2024.2.0
 MKLROOT=/opt/intel
 mkdir -p ${MKLROOT}
 pushd /tmp
 python3 -mpip install wheel
 python3 -mpip download -d . mkl-static==${MKL_VERSION}
 python3 -m wheel unpack mkl_static-${MKL_VERSION}-py2.py3-none-manylinux1_x86_64.whl
 python3 -m wheel unpack mkl_include-${MKL_VERSION}-py2.py3-none-manylinux1_x86_64.whl
 mv mkl_static-${MKL_VERSION}/mkl_static-${MKL_VERSION}.data/data/lib ${MKLROOT}
 mv mkl_include-${MKL_VERSION}/mkl_include-${MKL_VERSION}.data/data/include ${MKLROOT}
--- a/.ci/docker/common/install_mnist.sh
+++ b/.ci/docker/common/install_mnist.sh
@ -1,13 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -ex
 mkdir -p /usr/local/mnist/
 cd /usr/local/mnist
 for img in train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz; do
  wget -q https://ossci-datasets.s3.amazonaws.com/mnist/$img
  gzip -d $img
 done
--- a/.ci/docker/common/install_nccl.sh
+++ b/.ci/docker/common/install_nccl.sh
@ -1,28 +0,0 @@
 #!/bin/bash
 set -ex
 NCCL_VERSION=""
 if [[ ${CUDA_VERSION:0:2} == "11" ]]; then
  NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
 elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
  NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
 elif [[ ${CUDA_VERSION:0:2} == "13" ]]; then
  NCCL_VERSION=$(cat ci_commit_pins/nccl-cu13.txt)
 else
  echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
  exit 1
 fi
 if [[ -n "${NCCL_VERSION}" ]]; then
  # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
  # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
  pushd nccl
  make -j src.build
  cp -a build/include/* /usr/local/cuda/include/
  cp -a build/lib/* /usr/local/cuda/lib64/
  popd
  rm -rf nccl
  ldconfig
 fi
--- a/.ci/docker/common/install_ninja.sh
+++ b/.ci/docker/common/install_ninja.sh
@ -1,18 +0,0 @@
 #!/bin/bash
 set -ex
 [ -n "$NINJA_VERSION" ]
 arch=$(uname -m)
 if [ "$arch" == "aarch64" ]; then
    url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux-aarch64.zip"
 else
    url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
 fi
 pushd /tmp
 wget --no-verbose --output-document=ninja-linux.zip "$url"
 unzip ninja-linux.zip -d /usr/local/bin
 rm -f ninja-linux.zip
 popd
--- a/.ci/docker/common/install_nvpl.sh
+++ b/.ci/docker/common/install_nvpl.sh
@ -1,20 +0,0 @@
 #!/bin/bash
 set -ex
 function install_nvpl {
    mkdir -p /opt/nvpl/lib /opt/nvpl/include
    wget https://developer.download.nvidia.com/compute/nvpl/redist/nvpl_blas/linux-sbsa/nvpl_blas-linux-sbsa-0.3.0-archive.tar.xz
    tar xf nvpl_blas-linux-sbsa-0.3.0-archive.tar.xz
    cp -r nvpl_blas-linux-sbsa-0.3.0-archive/lib/* /opt/nvpl/lib/
    cp -r nvpl_blas-linux-sbsa-0.3.0-archive/include/* /opt/nvpl/include/
    wget https://developer.download.nvidia.com/compute/nvpl/redist/nvpl_lapack/linux-sbsa/nvpl_lapack-linux-sbsa-0.2.3.1-archive.tar.xz
    tar xf nvpl_lapack-linux-sbsa-0.2.3.1-archive.tar.xz
    cp -r nvpl_lapack-linux-sbsa-0.2.3.1-archive/lib/* /opt/nvpl/lib/
    cp -r nvpl_lapack-linux-sbsa-0.2.3.1-archive/include/* /opt/nvpl/include/
 }
 install_nvpl
--- a/.ci/docker/common/install_onnx.sh
+++ b/.ci/docker/common/install_onnx.sh
@ -1,38 +0,0 @@
 #!/bin/bash
 set -ex
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 retry () {
    "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
 }
 # ONNXRuntime should be installed before installing
 # onnx-weekly. Otherwise, onnx-weekly could be
 # overwritten by onnx.
 pip_install \
  parameterized==0.8.1 \
  pytest-cov==4.0.0 \
  pytest-subtests==0.10.0 \
  tabulate==0.9.0 \
  transformers==4.36.2
 pip_install coloredlogs packaging
 pip_install onnxruntime==1.23.0
 pip_install onnxscript==0.5.4
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
 as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
 # Very weird quoting behavior here https://github.com/conda/conda/issues/10972,
 # so echo the command to a file and run the file instead
 conda_run python "${IMPORT_SCRIPT_FILENAME}"
 # Cleaning up
 conda_run pip uninstall -y torch
 rm "${IMPORT_SCRIPT_FILENAME}" || true
--- a/.ci/docker/common/install_openblas.sh
+++ b/.ci/docker/common/install_openblas.sh
@ -1,25 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -ex
 OPENBLAS_VERSION=${OPENBLAS_VERSION:-"v0.3.30"}
 # Clone OpenBLAS
 git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" --depth 1 --shallow-submodules
 OPENBLAS_CHECKOUT_DIR="OpenBLAS"
 OPENBLAS_BUILD_FLAGS="
 NUM_THREADS=128
 USE_OPENMP=1
 NO_SHARED=0
 DYNAMIC_ARCH=1
 TARGET=ARMV8
 CFLAGS=-O3
 BUILD_BFLOAT16=1
 "
 make -j8 ${OPENBLAS_BUILD_FLAGS} -C $OPENBLAS_CHECKOUT_DIR
 sudo make install -C $OPENBLAS_CHECKOUT_DIR
 rm -rf $OPENBLAS_CHECKOUT_DIR
--- a/.ci/docker/common/install_openmpi.sh
+++ b/.ci/docker/common/install_openmpi.sh
@ -1,10 +0,0 @@
 #!/bin/bash
 sudo apt-get update
 # also install ssh to avoid error of:
 # --------------------------------------------------------------------------
 # The value of the MCA parameter "plm_rsh_agent" was set to a path
 # that could not be found:
 #   plm_rsh_agent: ssh : rsh
 sudo apt-get install -y ssh
 sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
--- a/.ci/docker/common/install_openssl.sh
+++ b/.ci/docker/common/install_openssl.sh
@ -1,17 +0,0 @@
 #!/bin/bash
 set -ex
 OPENSSL=openssl-1.1.1k
 wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
 tar xf "${OPENSSL}.tar.gz"
 cd "${OPENSSL}"
 ./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
 # NOTE: openssl install errors out when built with the -j option
 NPROC=$[$(nproc) - 2]
 make -j${NPROC}; make install_sw
 # Link the ssl libraries to the /usr/lib folder.
 sudo ln -s /opt/openssl/lib/lib* /usr/lib
 cd ..
 rm -rf "${OPENSSL}"
--- a/.ci/docker/common/install_patchelf.sh
+++ b/.ci/docker/common/install_patchelf.sh
@ -1,16 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 set -ex
 # Pin the version to latest release 0.17.2, building newer commit starts
 # to fail on the current image
 git clone -b 0.17.2 --single-branch https://github.com/NixOS/patchelf
 cd patchelf
 sed -i 's/serial/parallel/g' configure.ac
 ./bootstrap.sh
 ./configure
 make
 make install
 cd ..
 rm -rf patchelf
--- a/.ci/docker/common/install_python.sh
+++ b/.ci/docker/common/install_python.sh
@ -1,15 +0,0 @@
 #!/bin/bash
 set -ex
 apt-get update
 # Use deadsnakes in case we need an older python version
 sudo add-apt-repository ppa:deadsnakes/ppa
 apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv
 # Use a venv because uv and some other package managers don't support --user install
 ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
 python -m venv /var/lib/jenkins/ci_env
 source /var/lib/jenkins/ci_env/bin/activate
 python -mpip install --upgrade pip
 python -mpip install -r /opt/requirements-ci.txt
--- a/.ci/docker/common/install_rocm.sh
+++ b/.ci/docker/common/install_rocm.sh
@ -1,202 +0,0 @@
 #!/bin/bash
 set -ex
 # for pip_install function
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)"
 ver() {
    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
 }
 install_ubuntu() {
    apt-get update
    # gpg-agent is not available by default
    apt-get install -y --no-install-recommends gpg-agent
    if [[ $(ver $UBUNTU_VERSION) -ge $(ver 22.04) ]]; then
        echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
            | sudo tee /etc/apt/preferences.d/rocm-pin-600
    fi
    apt-get install -y kmod
    apt-get install -y wget
    # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
    apt-get install -y libc++1
    apt-get install -y libc++abi1
    # Make sure rocm packages from repo.radeon.com have highest priority
    cat << EOF > /etc/apt/preferences.d/rocm-pin-600
 Package: *
 Pin: release o=repo.radeon.com
 Pin-Priority: 600
 EOF
    # we want the patch version of 6.4 instead
    if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
        ROCM_VERSION="${ROCM_VERSION}.2"
    fi
    # Default url values
    rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
    amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
    # Add amdgpu repository
    UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
    echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
    # Add rocm repository
    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
    echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/rocm.list
    apt-get update --allow-insecure-repositories
    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
                   rocm-dev \
                   rocm-utils \
                   rocm-libs \
                   rccl \
                   rocprofiler-dev \
                   roctracer-dev \
                   amd-smi-lib
    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.1) ]]; then
        DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated rocm-llvm-dev
    fi
    # precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
    # search for all unversioned packages
    # if search fails it will abort this script; use true to avoid case where search fails
    MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
    if [[ "x${MIOPENHIPGFX}" = x ]]; then
      echo "miopen-hip-gfx package not available" && exit 1
    else
      DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
    fi
    # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
    for kdb in /opt/rocm/share/miopen/db/*.kdb
    do
        sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
    done
    # ROCm 6.3 had a regression where initializing static code objects had significant overhead
    # CI no longer builds for ROCm 6.3, but
    # ROCm 6.4 did not yet fix the regression, also HIP branch names are different
    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.4) ]] && [[ $(ver $ROCM_VERSION) -lt $(ver 7.0) ]]; then
        if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.2) ]]; then
            HIP_TAG=rocm-6.4.2
            CLR_HASH=74d78ba3ac4bac235d02bcb48511c30b5cfdd457  # branch release/rocm-rel-6.4.2-statco-hotfix
        elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.1) ]]; then
            HIP_TAG=rocm-6.4.1
            CLR_HASH=efe6c35790b9206923bfeed1209902feff37f386  # branch release/rocm-rel-6.4.1-statco-hotfix
        elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
            HIP_TAG=rocm-6.4.0
            CLR_HASH=600f5b0d2baed94d5121e2174a9de0851b040b0c  # branch release/rocm-rel-6.4-statco-hotfix
        fi
        # clr build needs CppHeaderParser but can only find it using conda's python
        python -m pip install CppHeaderParser
        git clone https://github.com/ROCm/HIP -b $HIP_TAG
        HIP_COMMON_DIR=$(readlink -f HIP)
        git clone https://github.com/jeffdaily/clr
        pushd clr
        git checkout $CLR_HASH
        popd
        mkdir -p clr/build
        pushd clr/build
        # Need to point CMake to the correct python installation to find CppHeaderParser
        cmake .. -DPython3_EXECUTABLE=/opt/conda/envs/py_${ANACONDA_PYTHON_VERSION}/bin/python3 -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
        make -j
        cp hipamd/lib/libamdhip64.so.6.4.* /opt/rocm/lib/libamdhip64.so.6.4.*
        popd
        rm -rf HIP clr
    fi
    pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
    # Cleanup
    apt-get autoclean && apt-get clean
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 install_centos() {
  yum update -y
  yum install -y kmod
  yum install -y wget
  yum install -y openblas-devel
  yum install -y epel-release
  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
  # Add amdgpu repository
  local amdgpu_baseurl
  if [[ $OS_VERSION == 9 ]]; then
      amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.0/main/x86_64"
  else
      amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
  fi
  echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
  echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
  echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
  echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
  echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
  echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
  echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
  echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
  echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
  yum update -y
  yum install -y \
                   rocm-dev \
                   rocm-utils \
                   rocm-libs \
                   rccl \
                   rocprofiler-dev \
                   roctracer-dev \
                   amd-smi-lib
  # precompiled miopen kernels; search for all unversioned packages
  # if search fails it will abort this script; use true to avoid case where search fails
  MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
  if [[ "x${MIOPENHIPGFX}" = x ]]; then
    echo "miopen-hip-gfx package not available" && exit 1
  else
    yum install -y ${MIOPENHIPGFX}
  fi
  # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
  for kdb in /opt/rocm/share/miopen/db/*.kdb
  do
      sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
  done
  pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
  # Cleanup
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 }
 # Install Python packages depending on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    install_ubuntu
    ;;
  centos)
    install_centos
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
--- a/.ci/docker/common/install_rocm_drm.sh
+++ b/.ci/docker/common/install_rocm_drm.sh
@ -1,150 +0,0 @@
 #!/bin/bash
 # Script used only in CD pipeline
 ###########################
 ### prereqs
 ###########################
 # Install Python packages depending on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    apt-get update -y
    apt-get install -y libpciaccess-dev pkg-config
    apt-get clean
    ;;
  centos|almalinux)
    yum install -y libpciaccess-devel pkgconfig
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 python3 -m pip install meson ninja
 ###########################
 ### clone repo
 ###########################
 GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
 pushd drm
 ###########################
 ### patch
 ###########################
 patch -p1 <<'EOF'
 diff --git a/amdgpu/amdgpu_asic_id.c b/amdgpu/amdgpu_asic_id.c
 index a5007ffc..13fa07fc 100644
 --- a/amdgpu/amdgpu_asic_id.c
 +++ b/amdgpu/amdgpu_asic_id.c
@@ -22,6 +22,13 @@
  *
  */
 +#define _XOPEN_SOURCE 700
 +#define _LARGEFILE64_SOURCE
 +#define _FILE_OFFSET_BITS 64
 +#include <ftw.h>
 +#include <link.h>
 +#include <limits.h>
 +
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -34,6 +41,19 @@
 #include "amdgpu_drm.h"
 #include "amdgpu_internal.h"
 +static char *amdgpuids_path = NULL;
 +static const char* amdgpuids_path_msg = NULL;
 +
 +static int check_for_location_of_amdgpuids(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo)
 +{
 +	if (typeflag == FTW_F && strstr(filepath, "amdgpu.ids")) {
 +		amdgpuids_path = strdup(filepath);
 +		return 1;
 +	}
 +
 +	return 0;
 +}
 +
 static int parse_one_line(struct amdgpu_device *dev, const char *line)
 {
 	char *buf, *saveptr;
@@ -113,10 +133,46 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
 	int line_num = 1;
 	int r = 0;
 +	// attempt to find typical location for amdgpu.ids file
 	fp = fopen(AMDGPU_ASIC_ID_TABLE, "r");
 +
 +	// if it doesn't exist, search
 +	if (!fp) {
 +
 +	char self_path[ PATH_MAX ];
 +	ssize_t count;
 +	ssize_t i;
 +
 +	count = readlink( "/proc/self/exe", self_path, PATH_MAX );
 +	if (count > 0) {
 +		self_path[count] = '\0';
 +
 +		// remove '/bin/python' from self_path
 +		for (i=count; i>0; --i) {
 +			if (self_path[i] == '/') break;
 +			self_path[i] = '\0';
 +		}
 +		self_path[i] = '\0';
 +		for (; i>0; --i) {
 +			if (self_path[i] == '/') break;
 +			self_path[i] = '\0';
 +		}
 +		self_path[i] = '\0';
 +
 +		if (1 == nftw(self_path, check_for_location_of_amdgpuids, 5, FTW_PHYS)) {
 +			fp = fopen(amdgpuids_path, "r");
 +			amdgpuids_path_msg = amdgpuids_path;
 +		}
 +	}
 +
 +	}
 +	else {
 +		amdgpuids_path_msg = AMDGPU_ASIC_ID_TABLE;
 +	}
 +
 +	// both hard-coded location and search have failed
 	if (!fp) {
 -		fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
 -			strerror(errno));
 +		//fprintf(stderr, "amdgpu.ids: No such file or directory\n");
 		return;
 	}
@@ -132,7 +188,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
 			continue;
 		}
 -		drmMsg("%s version: %s\n", AMDGPU_ASIC_ID_TABLE, line);
 +		drmMsg("%s version: %s\n", amdgpuids_path_msg, line);
 		break;
 	}
@@ -150,7 +206,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
 	if (r == -EINVAL) {
 		fprintf(stderr, "Invalid format: %s: line %d: %s\n",
 -			AMDGPU_ASIC_ID_TABLE, line_num, line);
 +			amdgpuids_path_msg, line_num, line);
 	} else if (r && r != -EAGAIN) {
 		fprintf(stderr, "%s: Cannot parse ASIC IDs: %s\n",
 			__func__, strerror(-r));
 EOF
 ###########################
 ### build
 ###########################
 meson builddir --prefix=/opt/amdgpu
 pushd builddir
 ninja install
 popd
 popd
--- a/.ci/docker/common/install_rocm_magma.sh
+++ b/.ci/docker/common/install_rocm_magma.sh
@ -1,37 +0,0 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline
 set -eou pipefail
 function do_install() {
    rocm_version=$1
    if [[ ${rocm_version} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
        # chop off any patch version
        rocm_version="${rocm_version%.*}"
    fi
    rocm_version_nodot=${rocm_version//./}
    # https://github.com/icl-utk-edu/magma/pull/65
    MAGMA_VERSION=d6e4117bc88e73f06d26c6c2e14f064e8fc3d1ec
    magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
    rocm_dir="/opt/rocm"
    (
        set -x
        tmp_dir=$(mktemp -d)
        pushd ${tmp_dir}
        curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
        if tar -xvf "${magma_archive}"
        then
            mkdir -p "${rocm_dir}/magma"
            mv include "${rocm_dir}/magma/include"
            mv lib "${rocm_dir}/magma/lib"
        else
            echo "${magma_archive} not found, skipping magma install"
        fi
        popd
    )
 }
 do_install $1
--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@ -1,107 +0,0 @@
 #!/bin/bash
 set -ex
 mkdir -p /opt/triton
 if [ -z "${TRITON}" ] && [ -z "${TRITON_CPU}" ]; then
  echo "TRITON and TRITON_CPU are not set. Exiting..."
  exit 0
 fi
 source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
 get_pip_version() {
  conda_run pip list | grep -w $* | head -n 1 | awk '{print $2}'
 }
 if [ -n "${XPU_VERSION}" ]; then
  TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
  TRITON_TEXT_FILE="triton-xpu"
 elif [ -n "${TRITON_CPU}" ]; then
  TRITON_REPO="https://github.com/triton-lang/triton-cpu"
  TRITON_TEXT_FILE="triton-cpu"
 else
  TRITON_REPO="https://github.com/triton-lang/triton"
  TRITON_TEXT_FILE="triton"
 fi
 # The logic here is copied from .ci/pytorch/common_utils.sh
 TRITON_PINNED_COMMIT=$(get_pinned_commit ${TRITON_TEXT_FILE})
 if [ -n "${UBUNTU_VERSION}" ];then
    apt update
    apt-get install -y gpg-agent
 fi
 # Keep the current cmake and numpy version here, so we can reinstall them later
 CMAKE_VERSION=$(get_pip_version cmake)
 NUMPY_VERSION=$(get_pip_version numpy)
 if [ -z "${MAX_JOBS}" ]; then
    export MAX_JOBS=$(nproc)
 fi
 # Git checkout triton
 mkdir /var/lib/jenkins/triton
 chown -R jenkins /var/lib/jenkins/triton
 chgrp -R jenkins /var/lib/jenkins/triton
 pushd /var/lib/jenkins/
 as_jenkins git clone --recursive ${TRITON_REPO} triton
 cd triton
 as_jenkins git checkout ${TRITON_PINNED_COMMIT}
 as_jenkins git submodule update --init --recursive
 # Old versions of python have setup.py in ./python; newer versions have it in ./
 if [ ! -f setup.py ]; then
  cd python
 fi
 pip_install pybind11==3.0.1
 # TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
 as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
 if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" == "7" ]]; then
  # Triton needs at least gcc-9 to build
  apt-get install -y g++-9
  CXX=g++-9 conda_run python -m build --wheel --no-isolation
 elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
  # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
  add-apt-repository -y ppa:ubuntu-toolchain-r/test
  apt-get install -y g++-9
  CXX=g++-9 conda_run python -m build --wheel --no-isolation
 else
  conda_run python -m build --wheel --no-isolation
 fi
 # Copy the wheel to /opt for multi stage docker builds
 cp dist/*.whl /opt/triton
 # Install the wheel for docker builds that don't use multi stage
 pip_install dist/*.whl
 # TODO: This is to make sure that the same cmake and numpy version from install conda
 # script is used. Without this step, the newer cmake version (3.25.2) downloaded by
 # triton build step via pip will fail to detect conda MKL. Once that issue is fixed,
 # this can be removed.
 #
 # The correct numpy version also needs to be set here because conda claims that it
 # causes inconsistent environment.  Without this, conda will attempt to install the
 # latest numpy version, which fails ASAN tests with the following import error: Numba
 # needs NumPy 1.20 or less.
 # Note that we install numpy with pip as conda might not have the version we want
 if [ -n "${CMAKE_VERSION}" ]; then
  pip_install "cmake==${CMAKE_VERSION}"
 fi
 if [ -n "${NUMPY_VERSION}" ]; then
  pip_install "numpy==${NUMPY_VERSION}"
 fi
 # IMPORTANT: helion needs to be installed without dependencies.
 # It depends on torch and triton. We don't want to install
 # triton and torch from production on Docker CI images
 if [[ "$ANACONDA_PYTHON_VERSION" != 3.9* ]]; then
  pip_install helion --no-deps
 fi
--- a/.ci/docker/common/install_ucc.sh
+++ b/.ci/docker/common/install_ucc.sh
@ -1,81 +0,0 @@
 #!/bin/bash
 set -ex
 if [[ -d "/usr/local/cuda/" ]];  then
  with_cuda=/usr/local/cuda/
 else
  with_cuda=no
 fi
 if [[ -d "/opt/rocm" ]]; then
  with_rocm=/opt/rocm
 else
  with_rocm=no
 fi
 function install_ucx() {
  set -ex
  git clone --recursive https://github.com/openucx/ucx.git
  pushd ucx
  git checkout ${UCX_COMMIT}
  git submodule update --init --recursive
  ./autogen.sh
  ./configure --prefix=$UCX_HOME      \
      --enable-mt                     \
      --with-cuda=$with_cuda          \
      --with-rocm=$with_rocm          \
      --enable-profiling              \
      --enable-stats
  time make -j
  sudo make install
  popd
  rm -rf ucx
 }
 function install_ucc() {
  set -ex
  git clone --recursive https://github.com/openucx/ucc.git
  pushd ucc
  git checkout ${UCC_COMMIT}
  git submodule update --init --recursive
  ./autogen.sh
  if [[ -n "$CUDA_VERSION"  && $CUDA_VERSION == 13* ]]; then
    NVCC_GENCODE="-gencode=arch=compute_86,code=compute_86"
  else
    # We only run distributed tests on Tesla M60 and A10G
    NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
  fi
  if [[ -n "$ROCM_VERSION" ]]; then
    if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
      amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
    else
      amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
    fi
    for arch in $amdgpu_targets; do
      HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
    done
  else
    HIP_OFFLOAD="all-arch-no-native"
  fi
  ./configure --prefix=$UCC_HOME          \
    --with-ucx=$UCX_HOME                  \
    --with-cuda=$with_cuda                \
    --with-nvcc-gencode="${NVCC_GENCODE}" \
    --with-rocm=$with_rocm                \
    --with-rocm-arch="${HIP_OFFLOAD}"
  time make -j
  sudo make install
  popd
  rm -rf ucc
 }
 install_ucx
 install_ucc
--- a/.ci/docker/common/install_user.sh
+++ b/.ci/docker/common/install_user.sh
@ -1,40 +0,0 @@
 #!/bin/bash
 set -ex
 # Since version 24 the system ships with user 'ubuntu' that has id 1000
 # We need a work-around to enable id 1000 usage for this script
 if [[ $UBUNTU_VERSION == 24.04 ]]; then
    # touch is used to disable harmless error message
    touch /var/mail/ubuntu && chown ubuntu /var/mail/ubuntu && userdel -r ubuntu
 fi
 # Mirror jenkins user in container
 # jenkins user as ec2-user should have the same user-id
 echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
 echo "jenkins:x:1000:" >> /etc/group
 # Needed on focal or newer
 echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow
 # Create $HOME
 mkdir -p /var/lib/jenkins
 chown jenkins:jenkins /var/lib/jenkins
 mkdir -p /var/lib/jenkins/.ccache
 chown jenkins:jenkins /var/lib/jenkins/.ccache
 # Allow writing to /usr/local (for make install)
 chown jenkins:jenkins /usr/local
 # Allow sudo
 # TODO: Maybe we shouldn't
 echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins
 # Work around bug where devtoolset replaces sudo and breaks it.
 if [ -n "$DEVTOOLSET_VERSION" ]; then
  SUDO=/bin/sudo
 else
  SUDO=sudo
 fi
 # Test that sudo works
 $SUDO -u jenkins $SUDO -v
--- a/.ci/docker/common/install_vision.sh
+++ b/.ci/docker/common/install_vision.sh
@ -1,46 +0,0 @@
 #!/bin/bash
 set -ex
 install_ubuntu() {
  apt-get update
  apt-get install -y --no-install-recommends \
          libopencv-dev
  # Cleanup
  apt-get autoclean && apt-get clean
  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 install_centos() {
  # Need EPEL for many packages we depend on.
  # See http://fedoraproject.org/wiki/EPEL
  yum --enablerepo=extras install -y epel-release
  yum install -y \
      opencv-devel
  # Cleanup
  yum clean all
  rm -rf /var/cache/yum
  rm -rf /var/lib/yum/yumdb
  rm -rf /var/lib/yum/history
 }
 # Install base packages depending on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
  ubuntu)
    install_ubuntu
    ;;
  centos)
    install_centos
    ;;
  *)
    echo "Unable to determine OS..."
    exit 1
    ;;
 esac
 # Cache vision models used by the test
 source "$(dirname "${BASH_SOURCE[0]}")/cache_vision_models.sh"
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@ -1,176 +0,0 @@
 #!/bin/bash
 set -xe
 # Script used in CI and CD pipeline
 # Intel® software for general purpose GPU capabilities.
 # Refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
 # Users should update to the latest version as it becomes available
 function install_ubuntu() {
    . /etc/os-release
    if [[ ! " jammy " =~ " ${VERSION_CODENAME} " ]]; then
        echo "Ubuntu version ${VERSION_CODENAME} not supported"
        exit
    fi
    apt-get update -y
    apt-get install -y gpg-agent wget
    # To add the online network package repository for the GPU Driver
    wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
        | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] \
        https://repositories.intel.com/gpu/ubuntu ${VERSION_CODENAME}${XPU_DRIVER_VERSION} unified" \
        | tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list
    # To add the online network network package repository for the Intel Support Packages
    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
        | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg.gpg
    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg.gpg] \
        https://apt.repos.intel.com/oneapi all main" \
        | tee /etc/apt/sources.list.d/oneAPI.list
    # Update the packages list and repository index
    apt-get update
    # The xpu-smi packages
    apt-get install -y flex bison xpu-smi
    if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
        # Compute and Media Runtimes
        apt-get install -y \
            intel-opencl-icd intel-level-zero-gpu level-zero \
            intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
            libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
            libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
            mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
        # Development Packages
        apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
    else # rolling driver
        apt-get install -y \
            intel-opencl-icd libze-intel-gpu1 libze1 \
            intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
            libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
            libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
            mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
        apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev
    fi
    # Install Intel Support Packages
    apt-get install -y ${XPU_PACKAGES}
    # Cleanup
    apt-get autoclean && apt-get clean
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 }
 function install_rhel() {
    . /etc/os-release
    if [[ "${ID}" == "rhel" ]]; then
        if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
            echo "RHEL version ${VERSION_ID} not supported"
            exit
        fi
    elif [[ "${ID}" == "almalinux" ]]; then
        # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
        VERSION_ID="8.8"
    fi
    dnf install -y 'dnf-command(config-manager)'
    # To add the online network package repository for the GPU Driver
    dnf config-manager --add-repo \
        https://repositories.intel.com/gpu/rhel/${VERSION_ID}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_ID}.repo
    # To add the online network network package repository for the Intel Support Packages
    tee > /etc/yum.repos.d/oneAPI.repo << EOF
 [oneAPI]
 name=Intel for Pytorch GPU dev repository
 baseurl=https://yum.repos.intel.com/oneapi
 enabled=1
 gpgcheck=1
 repo_gpgcheck=1
 gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
 EOF
    # Install Intel Support Packages
    yum install -y ${XPU_PACKAGES}
    # The xpu-smi packages
    dnf install -y xpu-smi
    # Compute and Media Runtimes
    dnf install --skip-broken -y \
        intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\
        level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \
        mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \
        mesa-libxatracker libvpl-tools intel-metrics-discovery \
        intel-metrics-library intel-igc-core intel-igc-cm \
        libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc
    # Development packages
    dnf install -y --refresh \
        intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
        level-zero-devel
    # Cleanup
    dnf clean all
    rm -rf /var/cache/yum
    rm -rf /var/lib/yum/yumdb
    rm -rf /var/lib/yum/history
 }
 function install_sles() {
    . /etc/os-release
    VERSION_SP=${VERSION_ID//./sp}
    if [[ ! " 15sp4 15sp5 " =~ " ${VERSION_SP} " ]]; then
        echo "SLES version ${VERSION_ID} not supported"
        exit
    fi
    # To add the online network package repository for the GPU Driver
    zypper addrepo -f -r \
        https://repositories.intel.com/gpu/sles/${VERSION_SP}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_SP}.repo
    rpm --import https://repositories.intel.com/gpu/intel-graphics.key
    # To add the online network network package repository for the Intel Support Packages
    zypper addrepo https://yum.repos.intel.com/oneapi oneAPI
    rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
    # The xpu-smi packages
    zypper install -y lsb-release flex bison xpu-smi
    # Compute and Media Runtimes
    zypper install -y intel-level-zero-gpu level-zero intel-gsc intel-opencl intel-ocloc \
        intel-media-driver libigfxcmrt7 libvpl2 libvpl-tools libmfxgen1 libmfx1
    # Development packages
    zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel
    # Install Intel Support Packages
    zypper install -y ${XPU_PACKAGES}
 }
 # Default use GPU driver rolling releases
 XPU_DRIVER_VERSION=""
 if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
    # Use GPU driver LTS releases
    XPU_DRIVER_VERSION="/lts/2350"
 fi
 # Default use Intel® oneAPI Deep Learning Essentials 2025.1
 if [[ "$XPU_VERSION" == "2025.2" ]]; then
    XPU_PACKAGES="intel-deep-learning-essentials-2025.2"
 else
    XPU_PACKAGES="intel-deep-learning-essentials-2025.1"
 fi
 # The installation depends on the base OS
 ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 case "$ID" in
    ubuntu)
        install_ubuntu
    ;;
    rhel|almalinux)
        install_rhel
    ;;
    sles)
        install_sles
    ;;
    *)
        echo "Unable to determine OS..."
        exit 1
    ;;
 esac
--- a/.ci/docker/common/patch_libstdc.sh
+++ b/.ci/docker/common/patch_libstdc.sh
@ -1,9 +0,0 @@
 #!/bin/bash
 set -xe
 # Script used in Linux x86 and aarch64 CD pipeline
 # Workaround for exposing statically linked libstdc++ CXX11 ABI symbols.
 # see: https://github.com/pytorch/pytorch/issues/133437
 LIBNONSHARED=$(gcc -print-file-name=libstdc++_nonshared.a)
 nm -g $LIBNONSHARED | grep " T " | grep recursive_directory_iterator | cut -c 20-  > weaken-symbols.txt
 objcopy --weaken-symbols weaken-symbols.txt $LIBNONSHARED $LIBNONSHARED
--- a/.ci/docker/libtorch/Dockerfile
+++ b/.ci/docker/libtorch/Dockerfile
@ -1,117 +0,0 @@
 ARG BASE_TARGET=base
 ARG GPU_IMAGE=ubuntu:20.04
 FROM ${GPU_IMAGE} as base
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get clean && apt-get update
 RUN apt-get install -y curl locales g++ git-all autoconf automake make cmake wget unzip sudo
 # Just add everything as a safe.directory for git since these will be used in multiple places with git
 RUN git config --global --add safe.directory '*'
 RUN locale-gen en_US.UTF-8
 ENV LC_ALL en_US.UTF-8
 ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8
 # Install openssl
 FROM base as openssl
 ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
 # Install python
 FROM base as python
 ADD common/install_cpython.sh install_cpython.sh
 RUN apt-get update -y && \
    apt-get install build-essential gdb lcov libbz2-dev libffi-dev \
        libgdbm-dev liblzma-dev libncurses5-dev libreadline6-dev \
        libsqlite3-dev libssl-dev lzma lzma-dev tk-dev uuid-dev zlib1g-dev -y && \
    bash ./install_cpython.sh && \
    rm install_cpython.sh && \
    apt-get clean
 FROM base as conda
 ADD ./common/install_conda_docker.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
 FROM base as cpu
 # Install Anaconda
 COPY --from=conda /opt/conda /opt/conda
 # Install python
 COPY --from=python /opt/python    /opt/python
 COPY --from=python /opt/_internal /opt/_internal
 ENV PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
 # Install MKL
 ADD ./common/install_mkl.sh install_mkl.sh
 RUN bash ./install_mkl.sh && rm install_mkl.sh
 FROM cpu as cuda
 ADD ./common/install_cuda.sh install_cuda.sh
 ADD ./common/install_magma.sh install_magma.sh
 COPY ./common/install_nccl.sh install_nccl.sh
 COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
 COPY ./common/install_cusparselt.sh install_cusparselt.sh
 ENV CUDA_HOME /usr/local/cuda
 FROM cuda as cuda12.6
 RUN bash ./install_cuda.sh 12.6
 RUN bash ./install_magma.sh 12.6
 RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
 FROM cuda as cuda12.8
 RUN bash ./install_cuda.sh 12.8
 RUN bash ./install_magma.sh 12.8
 RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda
 FROM cuda as cuda12.9
 RUN bash ./install_cuda.sh 12.9
 RUN bash ./install_magma.sh 12.9
 RUN ln -sf /usr/local/cuda-12.9 /usr/local/cuda
 FROM cuda as cuda13.0
 RUN bash ./install_cuda.sh 13.0
 RUN bash ./install_magma.sh 13.0
 RUN ln -sf /usr/local/cuda-13.0 /usr/local/cuda
 # Install libibverbs for libtorch and copy to CUDA directory
 RUN apt-get update -y && \
    apt-get install -y libibverbs-dev librdmacm-dev && \
    cp /usr/lib/x86_64-linux-gnu/libmlx5.so* /usr/local/cuda/lib64/ && \
    cp /usr/lib/x86_64-linux-gnu/librdmacm.so* /usr/local/cuda/lib64/ && \
    cp /usr/lib/x86_64-linux-gnu/libibverbs.so* /usr/local/cuda/lib64/ && \
    cp /usr/lib/x86_64-linux-gnu/libnl* /usr/local/cuda/lib64/
 FROM cpu as rocm
 ARG ROCM_VERSION
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 ENV MKLROOT /opt/intel
 # Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0)
 # find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
 # Remove below when ROCm5.7 is not in support matrix anymore.
 ENV ROCM_PATH /opt/rocm
 # No need to install ROCm as base docker image should have full ROCm install
 #ADD ./common/install_rocm.sh install_rocm.sh
 ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
 ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
 # gfortran and python needed for building magma from source for ROCm
 RUN apt-get update -y && \
    apt-get install gfortran -y && \
    apt-get install python3 python-is-python3 -y && \
    apt-get clean
 RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
 RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
 FROM ${BASE_TARGET} as final
 COPY --from=openssl            /opt/openssl           /opt/openssl
 # Install patchelf
 ADD ./common/install_patchelf.sh install_patchelf.sh
 RUN bash ./install_patchelf.sh && rm install_patchelf.sh
 # Install Anaconda
 COPY --from=conda /opt/conda /opt/conda
 # Install python
 COPY --from=python /opt/python    /opt/python
 COPY --from=python /opt/_internal /opt/_internal
 ENV PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
--- a/.ci/docker/libtorch/build.sh
+++ b/.ci/docker/libtorch/build.sh
@ -1,75 +0,0 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline
 set -eoux pipefail
 image="$1"
 shift
 if [ -z "${image}" ]; then
  echo "Usage: $0 IMAGENAME:ARCHTAG"
  exit 1
 fi
 TOPDIR=$(git rev-parse --show-toplevel)
 DOCKER=${DOCKER:-docker}
 # Go from imagename:tag to tag
 DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
 GPU_ARCH_VERSION=""
 if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
    # extract cuda version from image name.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
 elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
    # extract rocm version from image name.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
 fi
 case ${DOCKER_TAG_PREFIX} in
    cpu)
        BASE_TARGET=cpu
        GPU_IMAGE=ubuntu:20.04
        DOCKER_GPU_BUILD_ARG=""
        ;;
    cuda*)
        BASE_TARGET=cuda${GPU_ARCH_VERSION}
        GPU_IMAGE=ubuntu:20.04
        DOCKER_GPU_BUILD_ARG=""
        ;;
    rocm*)
        # we want the patch version of 7.0 instead
        if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
            GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
        fi
        # we want the patch version of 6.4 instead
        if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
            GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.4"
        fi
        BASE_TARGET=rocm
        GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
        # add gfx950, gfx115x conditionally starting in ROCm 7.0
        if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
            PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
        fi
        DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
        ;;
    *)
        echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"
        exit 1
        ;;
 esac
 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 DOCKER_BUILDKIT=1 ${DOCKER} build \
    --target final \
    ${DOCKER_GPU_BUILD_ARG} \
    --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
    --build-arg "BASE_TARGET=${BASE_TARGET}" \
    -t "${tmp_tag}" \
    $@ \
    -f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
    "${TOPDIR}/.ci/docker/"
--- a/.ci/docker/linter-cuda/Dockerfile
+++ b/.ci/docker/linter-cuda/Dockerfile
@ -1,48 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install missing libomp-dev
 RUN apt-get update && apt-get install -y --no-install-recommends libomp-dev && apt-get autoclean && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG PYTHON_VERSION
 ARG PIP_CMAKE
 # Put venv into the env vars so users don't need to activate it
 ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
 ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
 COPY requirements-ci.txt /opt/requirements-ci.txt
 COPY ./common/install_python.sh install_python.sh
 RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
 # Install cuda and cudnn
 ARG CUDA_VERSION
 COPY ./common/install_cuda.sh install_cuda.sh
 COPY ./common/install_nccl.sh install_nccl.sh
 COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
 COPY ./common/install_cusparselt.sh install_cusparselt.sh
 RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh
 ENV DESIRED_CUDA ${CUDA_VERSION}
 ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
 # Note that Docker build forbids copying file outside the build context
 COPY ./common/install_linter.sh install_linter.sh
 RUN bash ./install_linter.sh
 RUN rm install_linter.sh
 RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/linter/Dockerfile
+++ b/.ci/docker/linter/Dockerfile
@ -1,33 +0,0 @@
 ARG UBUNTU_VERSION
 FROM ubuntu:${UBUNTU_VERSION}
 ARG UBUNTU_VERSION
 ENV DEBIAN_FRONTEND noninteractive
 # Install common dependencies (so that this step can be cached separately)
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 # Install user
 COPY ./common/install_user.sh install_user.sh
 RUN bash ./install_user.sh && rm install_user.sh
 # Install conda and other packages (e.g., numpy, pytest)
 ARG PYTHON_VERSION
 ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
 ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
 COPY requirements-ci.txt /opt/requirements-ci.txt
 COPY ./common/install_python.sh install_python.sh
 RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
 # Note that Docker build forbids copying file outside the build context
 COPY ./common/install_linter.sh install_linter.sh
 RUN bash ./install_linter.sh
 RUN rm install_linter.sh
 RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
 USER jenkins
 CMD ["bash"]
--- a/.ci/docker/manywheel/Dockerfile_2_28
+++ b/.ci/docker/manywheel/Dockerfile_2_28
@ -1,181 +0,0 @@
 # syntax = docker/dockerfile:experimental
 ARG BASE_CUDA_VERSION=11.8
 ARG GPU_IMAGE=amd64/almalinux:8
 FROM quay.io/pypa/manylinux_2_28_x86_64 as base
 ENV LC_ALL en_US.UTF-8
 ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8
 ARG DEVTOOLSET_VERSION=13
 RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel yum-utils gcc-toolset-${DEVTOOLSET_VERSION}-gcc gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran gcc-toolset-${DEVTOOLSET_VERSION}-gdb
 ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 # cmake-3.18.4 from pip
 RUN yum install -y python3-pip && \
    python3 -mpip install cmake==3.18.4 && \
    ln -s /usr/local/bin/cmake /usr/bin/cmake3
 FROM base as openssl
 # Install openssl (this must precede `build python` step)
 # (In order to have a proper SSL module, Python is compiled
 # against a recent openssl [see env vars above], which is linked
 # statically. We delete openssl afterwards.)
 ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
 # remove unnecessary python versions
 RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
 RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
 FROM base as cuda
 ARG BASE_CUDA_VERSION=12.6
 # Install CUDA
 ADD ./common/install_cuda.sh install_cuda.sh
 COPY ./common/install_nccl.sh install_nccl.sh
 COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
 COPY ./common/install_cusparselt.sh install_cusparselt.sh
 RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh
 FROM base as intel
 # MKL
 ADD ./common/install_mkl.sh install_mkl.sh
 RUN bash ./install_mkl.sh && rm install_mkl.sh
 FROM base as magma
 ARG BASE_CUDA_VERSION=12.6
 # Install magma
 ADD ./common/install_magma.sh install_magma.sh
 RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
 FROM base as jni
 # Install java jni header
 ADD ./common/install_jni.sh install_jni.sh
 ADD ./java/jni.h jni.h
 RUN bash ./install_jni.sh && rm install_jni.sh
 FROM base as libpng
 # Install libpng
 ADD ./common/install_libpng.sh install_libpng.sh
 RUN bash ./install_libpng.sh && rm install_libpng.sh
 FROM ${GPU_IMAGE} as common
 ARG DEVTOOLSET_VERSION=13
 ENV LC_ALL en_US.UTF-8
 ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8
 RUN yum -y install epel-release
 RUN yum -y update
 RUN yum install -y \
        autoconf \
        automake \
        bison \
        bzip2 \
        curl \
        diffutils \
        file \
        git \
        make \
        patch \
        perl \
        unzip \
        util-linux \
        wget \
        which \
        xz \
        glibc-langpack-en \
        gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
        gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
        gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
        gcc-toolset-${DEVTOOLSET_VERSION}-gdb
 # git236+ would refuse to run git commands in repos owned by other users
 # Which causes version check to fail, as pytorch repo is bind-mounted into the image
 # Override this behaviour by treating every folder as safe
 # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
 RUN git config --global --add safe.directory "*"
 ENV SSL_CERT_FILE=/opt/_internal/certs.pem
 # Install LLVM version
 COPY --from=openssl            /opt/openssl                          /opt/openssl
 COPY --from=base               /opt/python                           /opt/python
 COPY --from=base               /usr/local/lib/                       /usr/local/lib/
 COPY --from=base               /opt/_internal                        /opt/_internal
 COPY --from=base               /usr/local/bin/auditwheel             /usr/local/bin/auditwheel
 COPY --from=intel              /opt/intel                            /opt/intel
 COPY --from=base               /usr/local/bin/patchelf               /usr/local/bin/patchelf
 COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/
 COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/
 COPY --from=libpng             /usr/local/include/png*               /usr/local/include/
 COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/
 COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/
 COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig
 COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h
 FROM common as cpu_final
 ARG BASE_CUDA_VERSION=12.6
 ARG DEVTOOLSET_VERSION=13
 # Install Anaconda
 ADD ./common/install_conda_docker.sh install_conda.sh
 RUN bash ./install_conda.sh && rm install_conda.sh
 ENV PATH /opt/conda/bin:$PATH
 # Ensure the expected devtoolset is used
 ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 # Install setuptools and wheel for python 3.12/3.13
 RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
    /opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
    done;
 ADD ./common/patch_libstdc.sh patch_libstdc.sh
 RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
 # cmake-3.18.4 from pip; force in case cmake3 already exists
 RUN yum install -y python3-pip && \
    python3 -mpip install cmake==3.18.4 && \
    ln -sf /usr/local/bin/cmake /usr/bin/cmake3
 FROM cpu_final as cuda_final
 RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
 RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
 ENV PATH=/usr/local/cuda/bin:$PATH
 FROM cpu_final as rocm_final
 ARG ROCM_VERSION=6.0
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
 ARG DEVTOOLSET_VERSION=11
 ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
 # Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
 # below workaround helps avoid error
 ENV ROCM_PATH /opt/rocm
 # cmake-3.28.4 from pip to get enable_language(HIP)
 # and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker
 RUN python3 -m pip install --upgrade pip && \
    python3 -mpip install cmake==3.28.4
 # replace the libdrm in /opt/amdgpu with custom amdgpu.ids lookup path
 ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
 RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
 # ROCm 6.4 rocm-smi depends on system drm.h header
 RUN yum install -y libdrm-devel
 ENV MKLROOT /opt/intel
 ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
 RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
 ADD ./common/install_miopen.sh install_miopen.sh
 RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
 FROM cpu_final as xpu_final
 # XPU CD use rolling driver
 ENV XPU_DRIVER_TYPE ROLLING
 # cmake-3.28.4 from pip
 RUN python3 -m pip install --upgrade pip && \
    python3 -mpip install cmake==3.28.4
 ADD ./common/install_xpu.sh install_xpu.sh
 ENV XPU_VERSION 2025.2
 RUN bash ./install_xpu.sh && rm install_xpu.sh
 RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd
--- a/.ci/docker/manywheel/Dockerfile_2_28_aarch64
+++ b/.ci/docker/manywheel/Dockerfile_2_28_aarch64
@ -1,83 +0,0 @@
 FROM quay.io/pypa/manylinux_2_28_aarch64 as base
 ARG GCCTOOLSET_VERSION=13
 # Language variables
 ENV LC_ALL=en_US.UTF-8
 ENV LANG=en_US.UTF-8
 ENV LANGUAGE=en_US.UTF-8
 # Installed needed OS packages. This is to support all
 # the binary builds (torch, vision, audio, text, data)
 RUN yum -y install epel-release
 RUN yum -y update
 RUN yum install -y \
  autoconf \
  automake \
  bison \
  bzip2 \
  curl \
  diffutils \
  file \
  git \
  less \
  libffi-devel \
  libgomp \
  make \
  openssl-devel \
  patch \
  perl \
  unzip \
  util-linux \
  wget \
  which \
  xz \
  yasm \
  zstd \
  sudo \
  gcc-toolset-${GCCTOOLSET_VERSION}-gcc \
  gcc-toolset-${GCCTOOLSET_VERSION}-gcc-c++ \
  gcc-toolset-${GCCTOOLSET_VERSION}-gcc-gfortran \
  gcc-toolset-${GCCTOOLSET_VERSION}-gdb
 # (optional) Install non-default Ninja version
 ARG NINJA_VERSION
 COPY ./common/install_ninja.sh install_ninja.sh
 RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
 RUN rm install_ninja.sh
 # Ensure the expected devtoolset is used
 ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 # git236+ would refuse to run git commands in repos owned by other users
 # Which causes version check to fail, as pytorch repo is bind-mounted into the image
 # Override this behaviour by treating every folder as safe
 # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
 RUN git config --global --add safe.directory "*"
 FROM base as openblas
 # Install openblas
 ARG OPENBLAS_VERSION
 ADD ./common/install_openblas.sh install_openblas.sh
 RUN bash ./install_openblas.sh && rm install_openblas.sh
 # Install Arm Compute Library
 FROM base as arm_compute
 # use python3.9 to install scons
 RUN python3.9 -m pip install scons==4.7.0
 RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin
 COPY ./common/install_acl.sh install_acl.sh
 RUN bash ./install_acl.sh && rm install_acl.sh
 FROM base as final
 # remove unnecessary python versions
 RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
 RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
 COPY --from=openblas     /opt/OpenBLAS/  /opt/OpenBLAS/
 COPY --from=arm_compute /acl /acl
 ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:/acl/build/:$LD_LIBRARY_PATH
 ADD ./common/patch_libstdc.sh patch_libstdc.sh
 RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
--- a/.ci/docker/manywheel/Dockerfile_cuda_aarch64
+++ b/.ci/docker/manywheel/Dockerfile_cuda_aarch64
@ -1,110 +0,0 @@
 FROM quay.io/pypa/manylinux_2_28_aarch64 as base
 # Cuda ARM build needs gcc 11
 ARG DEVTOOLSET_VERSION=13
 # Language variables
 ENV LC_ALL=en_US.UTF-8
 ENV LANG=en_US.UTF-8
 ENV LANGUAGE=en_US.UTF-8
 # Installed needed OS packages. This is to support all
 # the binary builds (torch, vision, audio, text, data)
 RUN yum -y install epel-release
 RUN yum -y update
 RUN yum install -y \
  autoconf \
  automake \
  bison \
  bzip2 \
  curl \
  diffutils \
  file \
  git \
  make \
  patch \
  perl \
  unzip \
  util-linux \
  wget \
  which \
  xz \
  yasm \
  less \
  zstd \
  libgomp \
  sudo \
  gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
  gcc-toolset-${DEVTOOLSET_VERSION}-gdb
 # Ensure the expected devtoolset is used
 ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 # git236+ would refuse to run git commands in repos owned by other users
 # Which causes version check to fail, as pytorch repo is bind-mounted into the image
 # Override this behaviour by treating every folder as safe
 # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
 RUN git config --global --add safe.directory "*"
 FROM base as openssl
 # Install openssl (this must precede `build python` step)
 # (In order to have a proper SSL module, Python is compiled
 # against a recent openssl [see env vars above], which is linked
 # statically. We delete openssl afterwards.)
 ADD ./common/install_openssl.sh install_openssl.sh
 RUN bash ./install_openssl.sh && rm install_openssl.sh
 ENV SSL_CERT_FILE=/opt/_internal/certs.pem
 FROM openssl as final
 # remove unnecessary python versions
 RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
 RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
 FROM base as cuda
 ARG BASE_CUDA_VERSION
 # Install CUDA
 ADD ./common/install_cuda.sh install_cuda.sh
 COPY ./common/install_nccl.sh install_nccl.sh
 COPY ./common/install_cusparselt.sh install_cusparselt.sh
 COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
 RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh
 FROM base as magma
 ARG BASE_CUDA_VERSION
 # Install magma
 ADD ./common/install_magma.sh install_magma.sh
 RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
 FROM base as nvpl
 # Install nvpl
 ADD ./common/install_nvpl.sh install_nvpl.sh
 RUN bash ./install_nvpl.sh && rm install_nvpl.sh
 # Install Arm Compute Library
 FROM base as arm_compute
 # use python3.9 to install scons
 RUN python3.9 -m pip install scons==4.7.0
 RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin
 COPY ./common/install_acl.sh install_acl.sh
 RUN bash ./install_acl.sh && rm install_acl.sh
 FROM base as final
 FROM final as cuda_final
 ARG BASE_CUDA_VERSION
 RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
 COPY --from=nvpl /opt/nvpl/lib/  /usr/local/lib/
 COPY --from=nvpl /opt/nvpl/include/  /usr/local/include/
 COPY --from=arm_compute /acl /acl
 RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
 ENV PATH=/usr/local/cuda/bin:$PATH
 ENV LD_LIBRARY_PATH=/acl/build/:$LD_LIBRARY_PATH
 ADD ./common/patch_libstdc.sh patch_libstdc.sh
 RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
--- a/.ci/docker/manywheel/Dockerfile_s390x
+++ b/.ci/docker/manywheel/Dockerfile_s390x
@ -1,144 +0,0 @@
 FROM quay.io/pypa/manylinux_2_28_s390x as base
 # Language variables
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
 ENV LANGUAGE=C.UTF-8
 # there is a bugfix in gcc >= 14 for precompiled headers and s390x vectorization interaction.
 # with earlier gcc versions test/inductor/test_cpu_cpp_wrapper.py will fail.
 ARG DEVTOOLSET_VERSION=14
 # Installed needed OS packages. This is to support all
 # the binary builds (torch, vision, audio, text, data)
 RUN yum -y install epel-release
 RUN yum -y update
 RUN yum install -y \
  sudo \
  autoconf \
  automake \
  bison \
  bzip2 \
  curl \
  diffutils \
  file \
  git \
  make \
  patch \
  perl \
  unzip \
  util-linux \
  wget \
  which \
  xz \
  yasm \
  less \
  zstd \
  libgomp \
  gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
  gcc-toolset-${DEVTOOLSET_VERSION}-binutils \
  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
  cmake \
  rust \
  cargo \
  llvm-devel \
  libzstd-devel \
  python3.12-devel \
  python3.12-test \
  python3.12-setuptools \
  python3.12-pip \
  python3-virtualenv \
  python3.12-pyyaml \
  python3.12-numpy \
  python3.12-wheel \
  python3.12-cryptography \
  blas-devel \
  openblas-devel \
  lapack-devel \
  atlas-devel \
  libjpeg-devel \
  libxslt-devel \
  libxml2-devel \
  openssl-devel \
  valgrind \
  ninja-build
 ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
 ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
 # git236+ would refuse to run git commands in repos owned by other users
 # Which causes version check to fail, as pytorch repo is bind-mounted into the image
 # Override this behaviour by treating every folder as safe
 # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
 RUN git config --global --add safe.directory "*"
 # installed python doesn't have development parts. Rebuild it from scratch
 RUN /bin/rm -rf /opt/_internal /opt/python /usr/local/*/*
 # EPEL for cmake
 FROM base as patchelf
 # Install patchelf
 ADD ./common/install_patchelf.sh install_patchelf.sh
 RUN bash ./install_patchelf.sh && rm install_patchelf.sh
 RUN cp $(which patchelf) /patchelf
 FROM patchelf as python
 # build python
 COPY manywheel/build_scripts /build_scripts
 ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
 ENV SSL_CERT_FILE=
 RUN bash build_scripts/build.sh && rm -r build_scripts
 FROM base as final
 COPY --from=python             /opt/python                           /opt/python
 COPY --from=python             /opt/_internal                        /opt/_internal
 COPY --from=python             /opt/python/cp39-cp39/bin/auditwheel  /usr/local/bin/auditwheel
 COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf
 RUN alternatives --set python /usr/bin/python3.12
 RUN alternatives --set python3 /usr/bin/python3.12
 RUN pip-3.12 install typing_extensions
 ENTRYPOINT []
 CMD ["/bin/bash"]
 # install test dependencies:
 # - grpcio requires system openssl, bundled crypto fails to build
 RUN dnf install -y \
  hdf5-devel \
  python3-h5py \
  git
 RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
 # cmake-3.28.0 from pip for onnxruntime
 RUN python3 -mpip install cmake==3.28.0
 ADD ./common/patch_libstdc.sh patch_libstdc.sh
 RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
 # build onnxruntime 1.21.0 from sources.
 # it is not possible to build it from sources using pip,
 # so just build it from upstream repository.
 # h5py is dependency of onnxruntime_training.
 # h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository.
 # h5py 3.11.0 doesn't build with numpy >= 2.3.0.
 # install newest flatbuffers version first:
 # for some reason old version is getting pulled in otherwise.
 # packaging package is required for onnxruntime wheel build.
 RUN pip3 install flatbuffers && \
  pip3 install cython 'pkgconfig>=1.5.5' 'setuptools>=77' 'numpy<2.3.0' && \
  pip3 install --no-build-isolation h5py==3.11.0 && \
  pip3 install packaging && \
  git clone https://github.com/microsoft/onnxruntime && \
  cd onnxruntime && git checkout v1.21.0 && \
  git submodule update --init --recursive && \
  wget https://github.com/microsoft/onnxruntime/commit/f57db79743c4d1a3553aa05cf95bcd10966030e6.patch && \
  patch -p1 < f57db79743c4d1a3553aa05cf95bcd10966030e6.patch && \
  ./build.sh --config Release --parallel 0 --enable_pybind \
  --build_wheel --enable_training --enable_training_apis \
  --enable_training_ops --skip_tests --allow_running_as_root \
  --compile_no_warning_as_error && \
  pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \
  cd .. && /bin/rm -rf ./onnxruntime
--- a/.ci/docker/manywheel/build.sh
+++ b/.ci/docker/manywheel/build.sh
@ -1,132 +0,0 @@
 #!/usr/bin/env bash
 # Script used only in CD pipeline
 set -exou pipefail
 TOPDIR=$(git rev-parse --show-toplevel)
 image="$1"
 shift
 if [ -z "${image}" ]; then
  echo "Usage: $0 IMAGE:ARCHTAG"
  exit 1
 fi
 # Go from imagename:tag to tag
 DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
 GPU_ARCH_VERSION=""
 if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
    # extract cuda version from image name.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
 elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
    # extract rocm version from image name.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
 fi
 MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
 DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
 OPENBLAS_VERSION=${OPENBLAS_VERSION:-}
 ACL_VERSION=${ACL_VERSION:-}
 case ${image} in
    manylinux2_28-builder:cpu)
        TARGET=cpu_final
        GPU_IMAGE=amd64/almalinux:8
        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13"
        MANY_LINUX_VERSION="2_28"
        ;;
    manylinux2_28_aarch64-builder:cpu-aarch64)
        TARGET=final
        GPU_IMAGE=arm64v8/almalinux:8
        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1"
        MANY_LINUX_VERSION="2_28_aarch64"
        ;;
    manylinuxs390x-builder:cpu-s390x)
        TARGET=final
        GPU_IMAGE=s390x/almalinux:8
        DOCKER_GPU_BUILD_ARG=""
        MANY_LINUX_VERSION="s390x"
        ;;
    manylinux2_28-builder:cuda11*)
        TARGET=cuda_final
        GPU_IMAGE=amd64/almalinux:8
        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
        MANY_LINUX_VERSION="2_28"
        ;;
    manylinux2_28-builder:cuda12*)
        TARGET=cuda_final
        GPU_IMAGE=amd64/almalinux:8
        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
        MANY_LINUX_VERSION="2_28"
        ;;
    manylinux2_28-builder:cuda13*)
        TARGET=cuda_final
        GPU_IMAGE=amd64/almalinux:8
        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
        MANY_LINUX_VERSION="2_28"
        ;;
    manylinuxaarch64-builder:cuda*)
        TARGET=cuda_final
        GPU_IMAGE=amd64/almalinux:8
        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
        MANY_LINUX_VERSION="aarch64"
        DOCKERFILE_SUFFIX="_cuda_aarch64"
        ;;
    manylinux2_28-builder:rocm*)
        # we want the patch version of 7.0 instead
        if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
            GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
        fi
        # we want the patch version of 6.4 instead
        if [[ "$GPU_ARCH_VERSION" == *"6.4"* ]]; then
            GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.4"
        fi
        TARGET=rocm_final
        MANY_LINUX_VERSION="2_28"
        DEVTOOLSET_VERSION="11"
        GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
        # add gfx950, gfx115x conditionally starting in ROCm 7.0
        if [[ "$GPU_ARCH_VERSION" == *"7.0"* ]]; then
            PYTORCH_ROCM_ARCH="${PYTORCH_ROCM_ARCH};gfx950;gfx1150;gfx1151"
        fi
        DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
        ;;
    manylinux2_28-builder:xpu)
        TARGET=xpu_final
        GPU_IMAGE=amd64/almalinux:8
        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
        MANY_LINUX_VERSION="2_28"
        ;;
    *)
        echo "ERROR: Unrecognized image name: ${image}"
        exit 1
        ;;
 esac
 if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
    DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
 fi
 # Only activate this if in CI
 if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
    # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
    # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
    sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
    sudo systemctl daemon-reload
    sudo systemctl restart docker
 fi
 tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
 DOCKER_BUILDKIT=1 docker build  \
    ${DOCKER_GPU_BUILD_ARG} \
    --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
    --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION:-}" \
    --build-arg "ACL_VERSION=${ACL_VERSION:-}" \
    --target "${TARGET}" \
    -t "${tmp_tag}" \
    $@ \
    -f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
    "${TOPDIR}/.ci/docker/"
--- a/.ci/docker/manywheel/build_scripts/build.sh
+++ b/.ci/docker/manywheel/build_scripts/build.sh
@ -1,118 +0,0 @@
 #!/bin/bash
 # Top-level build script called from Dockerfile
 # Script used only in CD pipeline
 # Stop at any error, show all commands
 set -ex
 # openssl version to build, with expected sha256 hash of .tar.gz
 # archive
 OPENSSL_ROOT=openssl-1.1.1l
 OPENSSL_HASH=0b7a3e5e59c34827fe0c3a74b7ec8baef302b98fa80088d7f9153aa16fa76bd1
 DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
 PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
 CURL_ROOT=curl-7.73.0
 CURL_HASH=cf34fe0b07b800f1c01a499a6e8b2af548f6d0e044dca4a29d88a4bee146d131
 AUTOCONF_ROOT=autoconf-2.69
 AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
 # Dependencies for compiling Python that we want to remove from
 # the final image after compiling Python
 PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel"
 if [ "$(uname -m)" != "s390x" ] ; then
    PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} db4-devel"
 else
    PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} libdb-devel"
 fi
 # Libraries that are allowed as part of the manylinux1 profile
 MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
 # Get build utilities
 MY_DIR=$(dirname "${BASH_SOURCE[0]}")
 source $MY_DIR/build_utils.sh
 # Development tools and libraries
 yum -y install bzip2 make git patch unzip bison yasm diffutils \
    automake which file \
    ${PYTHON_COMPILE_DEPS}
 # Install newest autoconf
 build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
 autoconf --version
 # Compile the latest Python releases.
 # (In order to have a proper SSL module, Python is compiled
 # against a recent openssl [see env vars above], which is linked
 # statically. We delete openssl afterwards.)
 build_openssl $OPENSSL_ROOT $OPENSSL_HASH
 /build_scripts/install_cpython.sh
 PY39_BIN=/opt/python/cp39-cp39/bin
 # Our openssl doesn't know how to find the system CA trust store
 #   (https://github.com/pypa/manylinux/issues/53)
 # And it's not clear how up-to-date that is anyway
 # So let's just use the same one pip and everyone uses
 $PY39_BIN/pip install certifi
 ln -s $($PY39_BIN/python -c 'import certifi; print(certifi.where())') \
      /opt/_internal/certs.pem
 # If you modify this line you also have to modify the versions in the
 # Dockerfiles:
 export SSL_CERT_FILE=/opt/_internal/certs.pem
 # Install newest curl
 build_curl $CURL_ROOT $CURL_HASH
 rm -rf /usr/local/include/curl /usr/local/lib/libcurl* /usr/local/lib/pkgconfig/libcurl.pc
 hash -r
 curl --version
 curl-config --features
 # Install patchelf (latest with unreleased bug fixes)
 curl -sLOk https://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.gz
 # check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH
 tar -xzf patchelf-0.10.tar.gz
 (cd patchelf-0.10 && ./configure && make && make install)
 rm -rf patchelf-0.10.tar.gz patchelf-0.10
 # Install latest pypi release of auditwheel
 $PY39_BIN/pip install auditwheel
 ln -s $PY39_BIN/auditwheel /usr/local/bin/auditwheel
 # Clean up development headers and other unnecessary stuff for
 # final image
 yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
    avahi freetype bitstream-vera-fonts \
    ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
 yum -y install ${MANYLINUX1_DEPS}
 yum -y clean all > /dev/null 2>&1
 yum list installed
 # we don't need libpython*.a, and they're many megabytes
 find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
 # Strip what we can -- and ignore errors, because this just attempts to strip
 # *everything*, including non-ELF files:
 find /opt/_internal -type f -print0 \
    | xargs -0 -n1 strip --strip-unneeded 2>/dev/null || true
 # We do not need the Python test suites, or indeed the precompiled .pyc and
 # .pyo files. Partially cribbed from:
 #    https://github.com/docker-library/python/blob/master/3.4/slim/Dockerfile  # @lint-ignore
 find /opt/_internal \
     \( -type d -a -name test -o -name tests \) \
  -o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \
  -print0 | xargs -0 rm -f
 for PYTHON in /opt/python/*/bin/python; do
    # Smoke test to make sure that our Pythons work, and do indeed detect as
    # being manylinux compatible:
    $PYTHON $MY_DIR/manylinux1-check.py
    # Make sure that SSL cert checking works
    $PYTHON $MY_DIR/ssl-check.py
 done
 # Fix libc headers to remain compatible with C99 compilers.
 find /usr/include/ -type f -exec sed -i 's/\bextern _*inline_*\b/extern __inline __attribute__ ((__gnu_inline__))/g' {} +
 # Now we can delete our built SSL
 rm -rf /usr/local/ssl
--- a/.ci/docker/manywheel/build_scripts/build_utils.sh
+++ b/.ci/docker/manywheel/build_scripts/build_utils.sh
@ -1,91 +0,0 @@
 #!/bin/bash
 # Helper utilities for build
 # Script used only in CD pipeline
 OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/  # @lint-ignore
 CURL_DOWNLOAD_URL=https://curl.se/download
 AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf
 function check_var {
    if [ -z "$1" ]; then
        echo "required variable not defined"
        exit 1
    fi
 }
 function do_openssl_build {
    ./config no-ssl2 no-shared -fPIC --prefix=/usr/local/ssl > /dev/null
    make > /dev/null
    make install > /dev/null
 }
 function check_sha256sum {
    local fname=$1
    check_var ${fname}
    local sha256=$2
    check_var ${sha256}
    echo "${sha256}  ${fname}" > ${fname}.sha256
    sha256sum -c ${fname}.sha256
    rm -f ${fname}.sha256
 }
 function build_openssl {
    local openssl_fname=$1
    check_var ${openssl_fname}
    local openssl_sha256=$2
    check_var ${openssl_sha256}
    check_var ${OPENSSL_DOWNLOAD_URL}
    curl -sLO ${OPENSSL_DOWNLOAD_URL}/${openssl_fname}.tar.gz
    check_sha256sum ${openssl_fname}.tar.gz ${openssl_sha256}
    tar -xzf ${openssl_fname}.tar.gz
    (cd ${openssl_fname} && do_openssl_build)
    rm -rf ${openssl_fname} ${openssl_fname}.tar.gz
 }
 function do_curl_build {
    LIBS=-ldl ./configure --with-ssl --disable-shared > /dev/null
    make > /dev/null
    make install > /dev/null
 }
 function build_curl {
    local curl_fname=$1
    check_var ${curl_fname}
    local curl_sha256=$2
    check_var ${curl_sha256}
    check_var ${CURL_DOWNLOAD_URL}
    curl -sLO ${CURL_DOWNLOAD_URL}/${curl_fname}.tar.bz2
    check_sha256sum ${curl_fname}.tar.bz2 ${curl_sha256}
    tar -jxf ${curl_fname}.tar.bz2
    (cd ${curl_fname} && do_curl_build)
    rm -rf ${curl_fname} ${curl_fname}.tar.bz2
 }
 function do_standard_install {
    ./configure > /dev/null
    make > /dev/null
    make install > /dev/null
 }
 function build_autoconf {
    local autoconf_fname=$1
    check_var ${autoconf_fname}
    local autoconf_sha256=$2
    check_var ${autoconf_sha256}
    check_var ${AUTOCONF_DOWNLOAD_URL}
    curl -sLO ${AUTOCONF_DOWNLOAD_URL}/${autoconf_fname}.tar.gz
    check_sha256sum ${autoconf_fname}.tar.gz ${autoconf_sha256}
    tar -zxf ${autoconf_fname}.tar.gz
    (cd ${autoconf_fname} && do_standard_install)
    rm -rf ${autoconf_fname} ${autoconf_fname}.tar.gz
 }
--- a/.ci/docker/manywheel/build_scripts/manylinux1-check.py
+++ b/.ci/docker/manywheel/build_scripts/manylinux1-check.py
@ -1,60 +0,0 @@
 # Logic copied from PEP 513
 def is_manylinux1_compatible():
    # Only Linux, and only x86-64 / i686
    from distutils.util import get_platform
    if get_platform() not in ["linux-x86_64", "linux-i686", "linux-s390x"]:
        return False
    # Check for presence of _manylinux module
    try:
        import _manylinux
        return bool(_manylinux.manylinux1_compatible)
    except (ImportError, AttributeError):
        # Fall through to heuristic check below
        pass
    # Check glibc version. CentOS 5 uses glibc 2.5.
    return have_compatible_glibc(2, 5)
 def have_compatible_glibc(major, minimum_minor):
    import ctypes
    process_namespace = ctypes.CDLL(None)
    try:
        gnu_get_libc_version = process_namespace.gnu_get_libc_version
    except AttributeError:
        # Symbol doesn't exist -> therefore, we are not linked to
        # glibc.
        return False
    # Call gnu_get_libc_version, which returns a string like "2.5".
    gnu_get_libc_version.restype = ctypes.c_char_p
    version_str = gnu_get_libc_version()
    # py2 / py3 compatibility:
    if not isinstance(version_str, str):
        version_str = version_str.decode("ascii")
    # Parse string and check against requested version.
    version = [int(piece) for piece in version_str.split(".")]
    assert len(version) == 2
    if major != version[0]:
        return False
    if minimum_minor > version[1]:
        return False
    return True
 import sys
 if is_manylinux1_compatible():
    print(f"{sys.executable} is manylinux1 compatible")
    sys.exit(0)
 else:
    print(f"{sys.executable} is NOT manylinux1 compatible")
    sys.exit(1)
--- a/.ci/docker/manywheel/build_scripts/ssl-check.py
+++ b/.ci/docker/manywheel/build_scripts/ssl-check.py
@ -1,26 +0,0 @@
 # cf. https://github.com/pypa/manylinux/issues/53
 import sys
 from urllib.request import urlopen
 GOOD_SSL = "https://google.com"
 BAD_SSL = "https://self-signed.badssl.com"
 print("Testing SSL certificate checking for Python:", sys.version)
 EXC = OSError
 print(f"Connecting to {GOOD_SSL} should work")
 urlopen(GOOD_SSL)
 print("...it did, yay.")
 print(f"Connecting to {BAD_SSL} should fail")
 try:
    urlopen(BAD_SSL)
    # If we get here then we failed:
    print("...it DIDN'T!!!!!11!!1one!")
    sys.exit(1)
 except EXC:
    print("...it did, yay.")
--- a/Show More
+++ b/Show More