Files
pytorch/Dockerfile
Nikita Shulga 4f18739bf0 Fix Docker image generation (#88741)
Pass install channel when building nightly images
Pass `TRITON_VERSION` argument to install triton for nightly images

Fix `generate_pytorch_version.py` to work with unannotated tags and avoid failures like the following:
```
% git checkout nightly
% ./.github/scripts/generate_pytorch_version.py

fatal: No annotated tags can describe '93f15b1b54ca5fb4a7ca9c21a813b4b86ebaeafa'.
However, there were unannotated tags: try --tags.
Traceback (most recent call last):
  File "/Users/nshulga/git/pytorch/pytorch-release/./.github/scripts/generate_pytorch_version.py", line 120, in <module>
    main()
  File "/Users/nshulga/git/pytorch/pytorch-release/./.github/scripts/generate_pytorch_version.py", line 115, in main
    print(version_obj.get_release_version())
  File "/Users/nshulga/git/pytorch/pytorch-release/./.github/scripts/generate_pytorch_version.py", line 75, in get_release_version
    if not get_tag():
  File "/Users/nshulga/git/pytorch/pytorch-release/./.github/scripts/generate_pytorch_version.py", line 37, in get_tag
    dirty_tag = subprocess.check_output(
  File "/Users/nshulga/miniforge3/lib/python3.9/subprocess.py", line 424, in check_output
    return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
  File "/Users/nshulga/miniforge3/lib/python3.9/subprocess.py", line 528, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['git', 'describe']' returned non-zero exit status 128.
```
After the change nightly is reported as(due to autolabelling issue,
should be fixed by ttps://github.com/pytorch/test-infra/pull/1047 ):
```
 % ./.github/scripts/generate_pytorch_version.py
ciflow/inductor/26921+cpu
```

Even for tagged release commits version generation was wrong:
```
% git checkout release/1.13
% ./.github/scripts/generate_pytorch_version.py
ciflow/periodic/79617-4848-g7c98e70d44+cpu
```
After the fix, it is as expected:
```
% ./.github/scripts/generate_pytorch_version.py
1.13.0+cpu
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/88741
Approved by: https://github.com/dagitses, https://github.com/msaroufim
2022-11-10 00:06:31 +00:00

99 lines
3.7 KiB
Docker

# syntax = docker/dockerfile:experimental
#
# NOTE: To build this you will need a docker version > 18.06 with
# experimental enabled and DOCKER_BUILDKIT=1
#
# If you do not use buildkit you are not going to have a good time
#
# For reference:
# https://docs.docker.com/develop/develop-images/build_enhancements/
ARG BASE_IMAGE=ubuntu:18.04
ARG PYTHON_VERSION=3.8
FROM ${BASE_IMAGE} as dev-base
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
ccache \
cmake \
curl \
git \
libjpeg-dev \
libpng-dev && \
rm -rf /var/lib/apt/lists/*
RUN /usr/sbin/update-ccache-symlinks
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
ENV PATH /opt/conda/bin:$PATH
FROM dev-base as conda
ARG PYTHON_VERSION=3.8
# Automatically set by buildx
ARG TARGETPLATFORM
# translating Docker's TARGETPLATFORM into miniconda arches
RUN case ${TARGETPLATFORM} in \
"linux/arm64") MINICONDA_ARCH=aarch64 ;; \
*) MINICONDA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh"
COPY requirements.txt .
RUN chmod +x ~/miniconda.sh && \
~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
/opt/conda/bin/python -mpip install -r requirements.txt && \
/opt/conda/bin/conda clean -ya
FROM dev-base as submodule-update
WORKDIR /opt/pytorch
COPY . .
RUN git submodule update --init --recursive --jobs 0
FROM conda as build
WORKDIR /opt/pytorch
COPY --from=conda /opt/conda /opt/conda
COPY --from=submodule-update /opt/pytorch /opt/pytorch
RUN --mount=type=cache,target=/opt/ccache \
TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
python setup.py install
FROM conda as conda-installs
ARG PYTHON_VERSION=3.8
ARG CUDA_VERSION=11.6
ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=pytorch-nightly
# Automatically set by buildx
RUN /opt/conda/bin/conda update -y conda
RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y python=${PYTHON_VERSION}
ARG TARGETPLATFORM
ARG TRITON_VERSION
# On arm64 we can only install wheel packages
RUN case ${TARGETPLATFORM} in \
"linux/arm64") pip install --extra-index-url https://download.pytorch.org/whl/cpu/ torch torchvision torchtext ;; \
*) /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch torchvision torchtext "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
esac && \
/opt/conda/bin/conda clean -ya
RUN /opt/conda/bin/pip install torchelastic
RUN if test -n "${TRITON_VERSION}" -a "${TARGETPLATFORM}" != "linux/arm64"; then /opt/conda/bin/pip install "torchtriton==${TRITON_VERSION}" --extra-index-url https://download.pytorch.org/whl/nightly/cpu ; fi
FROM ${BASE_IMAGE} as official
ARG PYTORCH_VERSION
LABEL com.nvidia.volumes.needed="nvidia_driver"
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
libjpeg-dev \
libpng-dev && \
rm -rf /var/lib/apt/lists/*
COPY --from=conda-installs /opt/conda /opt/conda
ENV PATH /opt/conda/bin:$PATH
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV PYTORCH_VERSION ${PYTORCH_VERSION}
WORKDIR /workspace
FROM official as dev
# Should override the already installed version from the official-image stage
COPY --from=build /opt/conda /opt/conda