diff --git a/.circleci/scripts/binary_checkout.sh b/.circleci/scripts/binary_checkout.sh deleted file mode 100755 index 7bcf0b7b6431..000000000000 --- a/.circleci/scripts/binary_checkout.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -set -eux -o pipefail - -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - - -# This step runs on multiple executors with different envfile locations -if [[ "$(uname)" == Darwin ]]; then - # macos executor (builds and tests) - workdir="/Users/distiller/project" -elif [[ "$OSTYPE" == "msys" ]]; then - # windows executor (builds and tests) - rm -rf /c/w - ln -s "/c/Users/circleci/project" /c/w - workdir="/c/w" -elif [[ -d "/home/circleci/project" ]]; then - # machine executor (binary tests) - workdir="/home/circleci/project" -else - # docker executor (binary builds) - workdir="/" -fi - -# It is very important that this stays in sync with binary_populate_env.sh -if [[ "$OSTYPE" == "msys" ]]; then - # We need to make the paths as short as possible on Windows - export PYTORCH_ROOT="$workdir/p" - export BUILDER_ROOT="$workdir/b" -else - export PYTORCH_ROOT="$workdir/pytorch" - export BUILDER_ROOT="$workdir/builder" -fi - -# Try to extract PR number from branch if not already set -if [[ -z "${CIRCLE_PR_NUMBER:-}" ]]; then - CIRCLE_PR_NUMBER="$(echo ${CIRCLE_BRANCH} | sed -E -n 's/pull\/([0-9]*).*/\1/p')" -fi - -# Clone the Pytorch branch -retry git clone https://github.com/pytorch/pytorch.git "$PYTORCH_ROOT" -pushd "$PYTORCH_ROOT" -if [[ -n "${CIRCLE_PR_NUMBER:-}" ]]; then - # "smoke" binary build on PRs - git fetch --force origin "pull/${CIRCLE_PR_NUMBER}/head:remotes/origin/pull/${CIRCLE_PR_NUMBER}" - git reset --hard "$CIRCLE_SHA1" - git checkout -q -B "$CIRCLE_BRANCH" - git reset --hard "$CIRCLE_SHA1" -elif [[ -n "${CIRCLE_SHA1:-}" ]]; then - # Scheduled workflows & "smoke" binary build on trunk on PR merges - DEFAULT_BRANCH="$(git remote show $CIRCLE_REPOSITORY_URL | awk '/HEAD branch/ {print $NF}')" - git reset --hard "$CIRCLE_SHA1" - git checkout -q -B $DEFAULT_BRANCH -else - echo "Can't tell what to checkout" - exit 1 -fi -retry git submodule update --init --recursive -echo "Using Pytorch from " -git --no-pager log --max-count 1 -popd - -# Clone the Builder main repo -retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT" -pushd "$BUILDER_ROOT" -echo "Using builder from " -git --no-pager log --max-count 1 -popd diff --git a/.circleci/scripts/binary_install_miniconda.sh b/.circleci/scripts/binary_install_miniconda.sh deleted file mode 100755 index ce08805bd5b0..000000000000 --- a/.circleci/scripts/binary_install_miniconda.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -set -eux -o pipefail - -# This step runs on multiple executors with different envfile locations -if [[ "$(uname)" == Darwin ]]; then - envfile="/Users/distiller/project/env" -elif [[ -d "/home/circleci/project" ]]; then - # machine executor (binary tests) - envfile="/home/circleci/project/env" -else - # docker executor (binary builds) - envfile="/env" -fi - -# TODO this is super hacky and ugly. Basically, the binary_update_html job does -# not have an env file, since it does not call binary_populate_env.sh, since it -# does not have a BUILD_ENVIRONMENT. So for this one case, which we detect by a -# lack of an env file, we manually export the environment variables that we -# need to install miniconda -if [[ ! -f "$envfile" ]]; then - MINICONDA_ROOT="/home/circleci/project/miniconda" - workdir="/home/circleci/project" - retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) - } - export -f retry -else - source "$envfile" -fi - -conda_sh="$workdir/install_miniconda.sh" -if [[ "$(uname)" == Darwin ]]; then - curl --retry 3 --retry-all-errors -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-py39_4.12.0-MacOSX-x86_64.sh -else - curl --retry 3 --retry-all-errors -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -fi -chmod +x "$conda_sh" -"$conda_sh" -b -p "$MINICONDA_ROOT" -rm -f "$conda_sh" - -# We can't actually add miniconda to the PATH in the envfile, because that -# breaks 'unbuffer' in Mac jobs. This is probably because conda comes with -# a tclsh, which then gets inserted before the tclsh needed in /usr/bin diff --git a/.circleci/scripts/binary_macos_build.sh b/.circleci/scripts/binary_macos_build.sh index 8ee131de0435..3f9e6e8eb515 100755 --- a/.circleci/scripts/binary_macos_build.sh +++ b/.circleci/scripts/binary_macos_build.sh @@ -4,10 +4,6 @@ set -eux -o pipefail source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" -if [[ -z "${GITHUB_ACTIONS:-}" ]]; then - export PATH="${workdir:-${HOME}}/miniconda/bin:${PATH}" -fi - # Build export USE_PYTORCH_METAL_EXPORT=1 export USE_COREML_DELEGATE=1 diff --git a/.circleci/scripts/binary_populate_env.sh b/.circleci/scripts/binary_populate_env.sh index 668de45e2c7b..287423641d77 100755 --- a/.circleci/scripts/binary_populate_env.sh +++ b/.circleci/scripts/binary_populate_env.sh @@ -3,17 +3,9 @@ set -eux -o pipefail export TZ=UTC tagged_version() { - # Grabs version from either the env variable CIRCLE_TAG - # or the pytorch git described version - if [[ "$OSTYPE" == "msys" && -z "${GITHUB_ACTIONS:-}" ]]; then - GIT_DIR="${workdir}/p/.git" - else - GIT_DIR="${workdir}/pytorch/.git" - fi + GIT_DIR="${workdir}/pytorch/.git" GIT_DESCRIBE="git --git-dir ${GIT_DIR} describe --tags --match v[0-9]*.[0-9]*.[0-9]*" - if [[ -n "${CIRCLE_TAG:-}" ]]; then - echo "${CIRCLE_TAG}" - elif [[ ! -d "${GIT_DIR}" ]]; then + if [[ ! -d "${GIT_DIR}" ]]; then echo "Abort, abort! Git dir ${GIT_DIR} does not exists!" kill $$ elif ${GIT_DESCRIBE} --exact >/dev/null; then @@ -59,6 +51,7 @@ PIP_UPLOAD_FOLDER='nightly/' # We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it export DATE="$(date -u +%Y%m%d)" BASE_BUILD_VERSION="$(cat ${PYTORCH_ROOT}/version.txt|cut -da -f1).dev${DATE}" + # Change BASE_BUILD_VERSION to git tag when on a git tag # Use 'git -C' to make doubly sure we're in the correct directory for checking # the git tag @@ -78,6 +71,35 @@ fi export PYTORCH_BUILD_NUMBER=1 +# Set triton version as part of PYTORCH_EXTRA_INSTALL_REQUIREMENTS +TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt) + +# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT +if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then + # Only linux Python < 3.12 are supported wheels for triton + TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64' and python_version < '3.12'" + TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}" + if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then + TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt) + TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}" + fi + export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}" +fi + +# Set triton via PYTORCH_EXTRA_INSTALL_REQUIREMENTS for triton rocm package +if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" && "$DESIRED_PYTHON" != "3.12" ]]; then + TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}" + if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then + TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-rocm.txt) + TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}" + fi + if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then + export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}" + else + export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}" + fi +fi + JAVA_HOME= BUILD_JNI=OFF if [[ "$PACKAGE_TYPE" == libtorch ]]; then @@ -123,12 +145,13 @@ if [[ "${OSTYPE}" == "msys" ]]; then else export DESIRED_DEVTOOLSET="${DESIRED_DEVTOOLSET:-}" fi -export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" + export DATE="$DATE" export NIGHTLIES_DATE_PREAMBLE=1.14.0.dev export PYTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION" export PYTORCH_BUILD_NUMBER="$PYTORCH_BUILD_NUMBER" export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION" +export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" # TODO: We don't need this anymore IIUC export TORCH_PACKAGE_NAME='torch' @@ -161,28 +184,6 @@ if [[ "$(uname)" != Darwin ]]; then EOL fi -if [[ -z "${GITHUB_ACTIONS:-}" ]]; then - cat >>"$envfile" <> "$envfile" echo ' $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)' >> "$envfile" echo '}' >> "$envfile" diff --git a/.circleci/scripts/binary_run_in_docker.sh b/.circleci/scripts/binary_run_in_docker.sh deleted file mode 100755 index 4af14becb426..000000000000 --- a/.circleci/scripts/binary_run_in_docker.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# This section is used in the binary_test and smoke_test jobs. It expects -# 'binary_populate_env' to have populated /home/circleci/project/env and it -# expects another section to populate /home/circleci/project/ci_test_script.sh -# with the code to run in the docker - -# Expect all needed environment variables to be written to this file -source /home/circleci/project/env -echo "Running the following code in Docker" -cat /home/circleci/project/ci_test_script.sh -echo -echo -set -eux -o pipefail - -# Expect actual code to be written to this file -chmod +x /home/circleci/project/ci_test_script.sh - -VOLUME_MOUNTS="-v /home/circleci/project/:/circleci_stuff -v /home/circleci/project/final_pkgs:/final_pkgs -v ${PYTORCH_ROOT}:/pytorch -v ${BUILDER_ROOT}:/builder" -# Run the docker -if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --gpus all ${VOLUME_MOUNTS} -t -d "${DOCKER_IMAGE}") -else - export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined ${VOLUME_MOUNTS} -t -d "${DOCKER_IMAGE}") -fi - -# Execute the test script that was populated by an earlier section -export COMMAND='((echo "source /circleci_stuff/env && /circleci_stuff/ci_test_script.sh") | docker exec -i "$id" bash) 2>&1' -echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts diff --git a/.circleci/scripts/setup_ci_environment.sh b/.circleci/scripts/setup_ci_environment.sh deleted file mode 100755 index 42a605cd4445..000000000000 --- a/.circleci/scripts/setup_ci_environment.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env bash -set -ex -o pipefail - -# Remove unnecessary sources -sudo rm -f /etc/apt/sources.list.d/google-chrome.list -sudo rm -f /etc/apt/heroku.list -sudo rm -f /etc/apt/openjdk-r-ubuntu-ppa-xenial.list -sudo rm -f /etc/apt/partner.list - -# To increase the network reliability, let apt decide which mirror is best to use -sudo sed -i -e 's/http:\/\/.*archive/mirror:\/\/mirrors/' -e 's/\/ubuntu\//\/mirrors.txt/' /etc/apt/sources.list - -retry () { - $* || $* || $* || $* || $* -} - -# Method adapted from here: https://askubuntu.com/questions/875213/apt-get-to-retry-downloading -# (with use of tee to avoid permissions problems) -# This is better than retrying the whole apt-get command -echo "APT::Acquire::Retries \"3\";" | sudo tee /etc/apt/apt.conf.d/80-retries - -retry sudo apt-get update -qq -retry sudo apt-get -y install \ - moreutils \ - expect-dev - -echo "== DOCKER VERSION ==" -docker version - -if ! command -v aws >/dev/null; then - retry sudo pip3 -q install awscli==1.19.64 -fi - -if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then - DRIVER_FN="NVIDIA-Linux-x86_64-515.76.run" - wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN" - sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false) - nvidia-smi - - # Taken directly from https://github.com/NVIDIA/nvidia-docker - # Add the package repositories - distribution=$(. /etc/os-release;echo "$ID$VERSION_ID") - curl -s -L --retry 3 --retry-all-errors https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - curl -s -L --retry 3 --retry-all-errors "https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list" | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - - retry sudo apt-get update -qq - # Necessary to get the `--gpus` flag to function within docker - retry sudo apt-get install -y nvidia-container-toolkit - sudo systemctl restart docker -else - # Explicitly remove nvidia docker apt repositories if not building for cuda - sudo rm -rf /etc/apt/sources.list.d/nvidia-docker.list -fi - -add_to_env_file() { - local name=$1 - local value=$2 - case "$value" in - *\ *) - # BASH_ENV should be set by CircleCI - echo "${name}='${value}'" >> "${BASH_ENV:-/tmp/env}" - ;; - *) - echo "${name}=${value}" >> "${BASH_ENV:-/tmp/env}" - ;; - esac -} - -add_to_env_file CI_MASTER "${CI_MASTER:-}" -add_to_env_file COMMIT_SOURCE "${CIRCLE_BRANCH:-}" -add_to_env_file BUILD_ENVIRONMENT "${BUILD_ENVIRONMENT}" -add_to_env_file CIRCLE_PULL_REQUEST "${CIRCLE_PULL_REQUEST}" - - -if [[ "${BUILD_ENVIRONMENT}" == *-build ]]; then - add_to_env_file SCCACHE_BUCKET ossci-compiler-cache-circleci-v2 - - SCCACHE_MAX_JOBS=$(( $(nproc) - 1 )) - MEMORY_LIMIT_MAX_JOBS=8 # the "large" resource class on CircleCI has 32 CPU cores, if we use all of them we'll OOM - MAX_JOBS=$(( ${SCCACHE_MAX_JOBS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${SCCACHE_MAX_JOBS} )) - add_to_env_file MAX_JOBS "${MAX_JOBS}" - - if [ -n "${USE_CUDA_DOCKER_RUNTIME:-}" ]; then - add_to_env_file TORCH_CUDA_ARCH_LIST 5.2 - fi - - if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then - # This IAM user allows write access to S3 bucket for sccache & bazels3cache - set +x - add_to_env_file XLA_CLANG_CACHE_S3_BUCKET_NAME "${XLA_CLANG_CACHE_S3_BUCKET_NAME:-}" - add_to_env_file AWS_ACCESS_KEY_ID "${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V2:-}" - add_to_env_file AWS_SECRET_ACCESS_KEY "${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V2:-}" - set -x - else - # This IAM user allows write access to S3 bucket for sccache - set +x - add_to_env_file XLA_CLANG_CACHE_S3_BUCKET_NAME "${XLA_CLANG_CACHE_S3_BUCKET_NAME:-}" - add_to_env_file AWS_ACCESS_KEY_ID "${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}" - add_to_env_file AWS_SECRET_ACCESS_KEY "${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V4:-}" - set -x - fi -fi - -# This IAM user only allows read-write access to ECR -set +x -export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_WRITE_V4:-} -export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_WRITE_V4:-} -export AWS_ACCOUNT_ID=$(aws sts get-caller-identity|grep Account|cut -f4 -d\") -export AWS_REGION=us-east-1 -aws ecr get-login-password --region $AWS_REGION|docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com -set -x diff --git a/.circleci/scripts/setup_linux_system_environment.sh b/.circleci/scripts/setup_linux_system_environment.sh deleted file mode 100755 index 780f7c1bd379..000000000000 --- a/.circleci/scripts/setup_linux_system_environment.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -set -eux -o pipefail - -# Set up CircleCI GPG keys for apt, if needed -curl --retry 3 --retry-all-errors -s -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add - - -# Stop background apt updates. Hypothetically, the kill should not -# be necessary, because stop is supposed to send a kill signal to -# the process, but we've added it for good luck. Also -# hypothetically, it's supposed to be unnecessary to wait for -# the process to block. We also have that line for good luck. -# If you like, try deleting them and seeing if it works. -sudo systemctl stop apt-daily.service || true -sudo systemctl kill --kill-who=all apt-daily.service || true - -sudo systemctl stop unattended-upgrades.service || true -sudo systemctl kill --kill-who=all unattended-upgrades.service || true - -# wait until `apt-get update` has been killed -while systemctl is-active --quiet apt-daily.service -do - sleep 1; -done -while systemctl is-active --quiet unattended-upgrades.service -do - sleep 1; -done - -# See if we actually were successful -systemctl list-units --all | cat - -# For good luck, try even harder to kill apt-get -sudo pkill apt-get || true - -# For even better luck, purge unattended-upgrades -sudo apt-get purge -y unattended-upgrades || true - -cat /etc/apt/sources.list - -# For the bestest luck, kill again now -sudo pkill apt || true -sudo pkill dpkg || true - -# Try to detect if apt/dpkg is stuck -if ps auxfww | grep '[a]pt'; then - echo "WARNING: There are leftover apt processes; subsequent apt update will likely fail" -fi -if ps auxfww | grep '[d]pkg'; then - echo "WARNING: There are leftover dpkg processes; subsequent apt update will likely fail" -fi