[ROCm] build magma rocm and upload tarball (#149902)

This will improve docker image build times by not having to rebuild magma rocm for unrelated changes.  This PR is step 1 of 2.  The next step is a second PR to modify the docker image builds to use the magma tarball that this PR will produce.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/149902
Approved by: https://github.com/malfet
This commit is contained in:
Jeff Daily
2025-03-25 20:20:36 +00:00
committed by PyTorch MergeBot
parent d1ff3ff675
commit bf8f4efd31
6 changed files with 234 additions and 0 deletions

2
.ci/magma-rocm/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
output/
magma-rocm*/

35
.ci/magma-rocm/Makefile Normal file
View File

@ -0,0 +1,35 @@
SHELL=/usr/bin/env bash
DOCKER_CMD ?= docker
DESIRED_ROCM ?= 6.3
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
PACKAGE_NAME = magma-rocm
# inherit this from underlying docker image, do not pass this env var to docker
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
-w /builder \
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_ROCM_SHORT} \
-e DESIRED_ROCM=${DESIRED_ROCM} \
"pytorch/manylinux2_28-builder:rocm${DESIRED_ROCM}-main" \
magma-rocm/build_magma.sh
.PHONY: all
all: magma-rocm63
all: magma-rocm624
.PHONY:
clean:
$(RM) -r magma-*
$(RM) -r output
.PHONY: magma-rocm63
magma-rocm63: DESIRED_ROCM := 6.3
magma-rocm63:
$(DOCKER_RUN)
.PHONY: magma-rocm624
magma-rocm624: DESIRED_ROCM := 6.2.4
magma-rocm624:
$(DOCKER_RUN)

48
.ci/magma-rocm/README.md Normal file
View File

@ -0,0 +1,48 @@
# Magma ROCm
This folder contains the scripts and configurations to build libmagma.so, linked for various versions of ROCm.
## Building
Look in the `Makefile` for available targets to build. To build any target, for example `magma-rocm63`, run
```
# Using `docker`
make magma-rocm63
# Using `podman`
DOCKER_CMD=podman make magma-rocm63
```
This spawns a `pytorch/manylinux-rocm<version>` docker image, which has the required `devtoolset` and ROCm versions installed.
Within the docker image, it runs `build_magma.sh` with the correct environment variables set, which package the necessary files
into a tarball, with the following structure:
```
.
├── include # header files
├── lib # libmagma.so
├── info
│ ├── licenses # license file
│ └── recipe # build script
```
More specifically, `build_magma.sh` copies over the relevant files from the `package_files` directory depending on the ROCm version.
Outputted binaries should be in the `output` folder.
## Pushing
Packages can be uploaded to an S3 bucket using:
```
aws s3 cp output/*/magma-cuda*.bz2 <bucket-with-path>
```
If you do not have upload permissions, please ping @seemethere or @soumith to gain access
## New versions
New ROCm versions can be added by creating a new make target with the next desired version. For ROCm version N.n, the target should be named `magma-rocmNn`.
Make sure to edit the appropriate environment variables (e.g., DESIRED_ROCM) in the `Makefile` accordingly. Remember also to check `build_magma.sh` to ensure the logic for copying over the files remains correct.

42
.ci/magma-rocm/build_magma.sh Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
set -eou pipefail
# Environment variables
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
# Version 2.7.2 + ROCm related updates
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
# Folders for the build
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
PACKAGE_DIR=${ROOT_DIR}/magma-rocm/${PACKAGE_NAME} # build workspace
PACKAGE_OUTPUT=${ROOT_DIR}/magma-rocm/output # where tarballs are stored
PACKAGE_BUILD=${PACKAGE_DIR} # where the content of the tarball is prepared
PACKAGE_RECIPE=${PACKAGE_BUILD}/info/recipe
PACKAGE_LICENSE=${PACKAGE_BUILD}/info/licenses
mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RECIPE} ${PACKAGE_LICENSE}
# Fetch magma sources and verify checksum
pushd ${PACKAGE_DIR}
git clone https://bitbucket.org/icl/magma.git
pushd magma
git checkout ${MAGMA_VERSION}
popd
popd
# build
pushd ${PACKAGE_DIR}/magma
# The build.sh script expects to be executed from the sources root folder
INSTALL_DIR=${PACKAGE_BUILD} ${PACKAGE_FILES}/build.sh
popd
# Package recipe, license and tarball
# Folder and package name are backward compatible for the build workflow
cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh
cp ${PACKAGE_DIR}/magma/COPYRIGHT ${PACKAGE_LICENSE}/COPYRIGHT
pushd ${PACKAGE_BUILD}
tar cjf ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 include lib info
echo Built in ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2
popd

View File

@ -0,0 +1,38 @@
# Magma build scripts need `python`
ln -sf /usr/bin/python3 /usr/bin/python
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
almalinux)
yum install -y gcc-gfortran
;;
*)
echo "No preinstalls to build magma..."
;;
esac
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then
echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc
fi
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
export PATH="${PATH}:/opt/rocm/bin"
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
else
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
fi
for arch in $amdgpu_targets; do
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
done
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
make -f make.gen.hipMAGMA -j $(nproc)
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
cp -R lib ${INSTALL_DIR}
cp -R include ${INSTALL_DIR}

View File

@ -0,0 +1,69 @@
name: build-linux-magma-rocm
on:
push:
branches:
main
paths:
- .ci/magma-rocm/*
- .ci/magma-rocm/package_files/*
- .github/workflows/build-magma-rocm-linux.yml
pull_request:
paths:
- .ci/magma-rocm/*
- .ci/magma-rocm/package_files/*
- .github/workflows/build-magma-rocm-linux.yml
defaults:
run:
shell: bash -x -e -l {0}
env:
BUILD_ENVIRONMENT: build-linux-magma-rocm
IN_CI: 1
IS_GHA: 1
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
build-linux-magma-rocm:
if: github.repository_owner == 'pytorch'
runs-on: linux.12xlarge
permissions:
id-token: write
strategy:
matrix:
rocm_version: ["63", "624"]
steps:
- name: Checkout PyTorch
uses: actions/checkout@v4
- name: Build Magma Rocm
working-directory: .ci/magma-rocm
run: |
# Produces artifacts under magma-rocm/output/linux-64/magma-rocm*.bz2
make magma-rocm${{ matrix.rocm_version }}
- name: Save as artifact
uses: actions/upload-artifact@v4
with:
path: .ci/magma-rocm/output/linux-64/magma-rocm*.bz2
name: artifact_${{ matrix.rocm_version }}
- name: Configure AWS credentials(PyTorch account)
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_ossci_linux_windows_read_write
aws-region: us-east-1
- name: Set DRY_RUN
if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' }}
run: |
echo "DRY_RUN=disabled" >> "$GITHUB_ENV"
- name: Upload binaries
shell: bash
env:
PKG_DIR: ".ci/magma-rocm/output/linux-64/"
TARGET_OS: "linux"
PKG_INCLUDE: "magma-rocm*.tar.bz2"
run: |
set -ex
bash .github/scripts/upload_aws_ossci.sh