From c5de535bc0b785abbacfebddf660af4cd3b2a6a1 Mon Sep 17 00:00:00 2001 From: atalman Date: Wed, 19 Oct 2022 21:26:53 +0000 Subject: [PATCH] Advance nightly docker to 11.6 (#86941) Fixes following: https://github.com/pytorch/pytorch/actions/runs/3242695506/jobs/5316334351 crash in Docker builds introduced by: #82682 The PR seems to introduce some changes not compatible with cuda 11.3 which is used by our Docker builds Pull Request resolved: https://github.com/pytorch/pytorch/pull/86941 Approved by: https://github.com/malfet --- .github/scripts/build_publish_nightly_docker.sh | 3 ++- Dockerfile | 7 ++++--- docker.Makefile | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/scripts/build_publish_nightly_docker.sh b/.github/scripts/build_publish_nightly_docker.sh index db84704aa3e4..d653621ef933 100644 --- a/.github/scripts/build_publish_nightly_docker.sh +++ b/.github/scripts/build_publish_nightly_docker.sh @@ -3,13 +3,14 @@ set -xeuo pipefail PYTORCH_DOCKER_TAG=$(git describe --tags --always)-devel -CUDA_VERSION=11.3.1 +CUDA_VERSION=11.6.2 # Build PyTorch nightly docker make -f docker.Makefile \ DOCKER_REGISTRY=ghcr.io \ DOCKER_ORG=pytorch \ CUDA_VERSION=${CUDA_VERSION} \ + CUDA_VERSION_SHORT="${CUDA_VERSION:0:2}.${CUDA_VERSION:4:1}" \ DOCKER_IMAGE=pytorch-nightly \ DOCKER_TAG=${PYTORCH_DOCKER_TAG} \ INSTALL_CHANNEL=pytorch-nightly BUILD_TYPE=official devel-image diff --git a/Dockerfile b/Dockerfile index 815a9108ce94..e49e0a44e816 100644 --- a/Dockerfile +++ b/Dockerfile @@ -59,17 +59,18 @@ RUN --mount=type=cache,target=/opt/ccache \ FROM conda as conda-installs ARG PYTHON_VERSION=3.8 -ARG CUDA_VERSION=11.3 +ARG CUDA_VERSION=11.6 ARG CUDA_CHANNEL=nvidia ARG INSTALL_CHANNEL=pytorch-nightly -ENV CONDA_OVERRIDE_CUDA=${CUDA_VERSION} # Automatically set by buildx +RUN /opt/conda/bin/conda update -y conda RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y python=${PYTHON_VERSION} ARG TARGETPLATFORM + # On arm64 we can only install wheel packages RUN case ${TARGETPLATFORM} in \ "linux/arm64") pip install --extra-index-url https://download.pytorch.org/whl/cpu/ torch torchvision torchtext ;; \ - *) /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch torchvision torchtext "cudatoolkit=${CUDA_VERSION}" ;; \ + *) /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch torchvision torchtext "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \ esac && \ /opt/conda/bin/conda clean -ya RUN /opt/conda/bin/pip install torchelastic diff --git a/docker.Makefile b/docker.Makefile index 0768f6ecf6ed..fe3691b979ea 100644 --- a/docker.Makefile +++ b/docker.Makefile @@ -8,7 +8,7 @@ $(warning WARNING: No docker user found using results from whoami) DOCKER_ORG = $(shell whoami) endif -CUDA_VERSION = 11.3.1 +CUDA_VERSION = 11.6.2 CUDNN_VERSION = 8 BASE_RUNTIME = ubuntu:18.04 BASE_DEVEL = nvidia/cuda:$(CUDA_VERSION)-cudnn$(CUDNN_VERSION)-devel-ubuntu18.04 @@ -18,7 +18,7 @@ CUDA_CHANNEL = nvidia # The conda channel to use to install pytorch / torchvision INSTALL_CHANNEL ?= pytorch -PYTHON_VERSION ?= 3.8 +PYTHON_VERSION ?= 3.9 PYTORCH_VERSION ?= $(shell git describe --tags --always) # Can be either official / dev BUILD_TYPE ?= dev