mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Related to: https://github.com/pytorch/pytorch/issues/125879 Would check if we are compiled with CUDA before publishing CUDA Docker nightly image Test ``` #18 [conda-installs 5/5] RUN IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())'); echo "Is torch compiled with cuda: ${IS_CUDA}"; if test "${IS_CUDA}" != "True" -a ! -z "12.4.0"; then exit 1; fi #18 1.656 Is torch compiled with cuda: False #18 ERROR: process "/bin/sh -c IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())'); echo \"Is torch compiled with cuda: ${IS_CUDA}\"; if test \"${IS_CUDA}\" != \"True\" -a ! -z \"${CUDA_VERSION}\"; then \texit 1; fi" did not complete successfully: exit code: 1 ------ > [conda-installs 5/5] RUN IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())'); echo "Is torch compiled with cuda: ${IS_CUDA}"; if test "${IS_CUDA}" != "True" -a ! -z "12.4.0"; then exit 1; fi: 1.656 Is torch compiled with cuda: False ------ Dockerfile:80 -------------------- 79 | RUN /opt/conda/bin/pip install torchelastic 80 | >>> RUN IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())');\ 81 | >>> echo "Is torch compiled with cuda: ${IS_CUDA}"; \ 82 | >>> if test "${IS_CUDA}" != "True" -a ! -z "${CUDA_VERSION}"; then \ 83 | >>> exit 1; \ 84 | >>> fi 85 | -------------------- ERROR: failed to solve: process "/bin/sh -c IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())'); echo \"Is torch compiled with cuda: ${IS_CUDA}\"; if test \"${IS_CUDA}\" != \"True\" -a ! -z \"${CUDA_VERSION}\"; then \texit 1; fi" did not complete successfully: exit code: 1 (base) [ec2-user@ip-172-30-2-248 pytorch]$ docker buildx build --progress=plain --platform="linux/amd64" --target official -t ghcr.io/pytorch/pytorch:2.5.0.dev20240617-cuda12.4-cudnn9-devel --build-arg BASE_IMAGE=nvidia/cuda:12.4.0-devel-ubuntu22.04 --build-arg PYTHON_VERSION=3.11 --build-arg CUDA_VERSION= --build-arg CUDA_CHANNEL=nvidia --build-arg PYTORCH_VERSION=2.5.0.dev20240617 --build-arg INSTALL_CHANNEL=pytorch --build-arg TRITON_VERSION= --build-arg CMAKE_VARS="" . #0 building with "default" instance using docker driver ``` Please note looks like we are installing from pytorch rather then nighlty channel on PR hence cuda 12.4 is failing since its not in pytorch channel yet: https://github.com/pytorch/pytorch/actions/runs/9555354734/job/26338476741?pr=128852 Pull Request resolved: https://github.com/pytorch/pytorch/pull/128852 Approved by: https://github.com/malfet
113 lines
4.1 KiB
Docker
113 lines
4.1 KiB
Docker
# syntax=docker/dockerfile:1
|
|
|
|
# NOTE: Building this image require's docker version >= 23.0.
|
|
#
|
|
# For reference:
|
|
# - https://docs.docker.com/build/dockerfile/frontend/#stable-channel
|
|
|
|
ARG BASE_IMAGE=ubuntu:22.04
|
|
ARG PYTHON_VERSION=3.11
|
|
|
|
FROM ${BASE_IMAGE} as dev-base
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
ca-certificates \
|
|
ccache \
|
|
cmake \
|
|
curl \
|
|
git \
|
|
libjpeg-dev \
|
|
libpng-dev && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
RUN /usr/sbin/update-ccache-symlinks
|
|
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
|
ENV PATH /opt/conda/bin:$PATH
|
|
|
|
FROM dev-base as conda
|
|
ARG PYTHON_VERSION=3.11
|
|
# Automatically set by buildx
|
|
ARG TARGETPLATFORM
|
|
# translating Docker's TARGETPLATFORM into miniconda arches
|
|
RUN case ${TARGETPLATFORM} in \
|
|
"linux/arm64") MINICONDA_ARCH=aarch64 ;; \
|
|
*) MINICONDA_ARCH=x86_64 ;; \
|
|
esac && \
|
|
curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh"
|
|
COPY requirements.txt .
|
|
# Manually invoke bash on miniconda script per https://github.com/conda/conda/issues/10431
|
|
RUN chmod +x ~/miniconda.sh && \
|
|
bash ~/miniconda.sh -b -p /opt/conda && \
|
|
rm ~/miniconda.sh && \
|
|
/opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
|
|
/opt/conda/bin/python -mpip install -r requirements.txt && \
|
|
/opt/conda/bin/conda clean -ya
|
|
|
|
FROM dev-base as submodule-update
|
|
WORKDIR /opt/pytorch
|
|
COPY . .
|
|
RUN git submodule update --init --recursive
|
|
|
|
FROM conda as build
|
|
ARG CMAKE_VARS
|
|
WORKDIR /opt/pytorch
|
|
COPY --from=conda /opt/conda /opt/conda
|
|
COPY --from=submodule-update /opt/pytorch /opt/pytorch
|
|
RUN make triton
|
|
RUN --mount=type=cache,target=/opt/ccache \
|
|
export eval ${CMAKE_VARS} && \
|
|
TORCH_CUDA_ARCH_LIST="7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0 9.0a" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
|
|
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
|
|
python setup.py install
|
|
|
|
FROM conda as conda-installs
|
|
ARG PYTHON_VERSION=3.11
|
|
ARG CUDA_VERSION=12.1
|
|
ARG CUDA_CHANNEL=nvidia
|
|
ARG INSTALL_CHANNEL=pytorch-nightly
|
|
# Automatically set by buildx
|
|
RUN /opt/conda/bin/conda update -y -n base -c defaults conda
|
|
RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y python=${PYTHON_VERSION}
|
|
|
|
ARG TARGETPLATFORM
|
|
|
|
# On arm64 we can only install wheel packages.
|
|
RUN case ${TARGETPLATFORM} in \
|
|
"linux/arm64") pip install --extra-index-url https://download.pytorch.org/whl/cpu/ torch torchvision torchaudio ;; \
|
|
*) /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" pytorch torchvision torchaudio "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
|
|
esac && \
|
|
/opt/conda/bin/conda clean -ya
|
|
RUN /opt/conda/bin/pip install torchelastic
|
|
RUN IS_CUDA=$(python -c 'import torch ; print(torch.cuda._is_compiled())'); \
|
|
echo "Is torch compiled with cuda: ${IS_CUDA}"; \
|
|
if test "${IS_CUDA}" != "True" -a ! -z "${CUDA_VERSION}"; then \
|
|
exit 1; \
|
|
fi
|
|
|
|
FROM ${BASE_IMAGE} as official
|
|
ARG PYTORCH_VERSION
|
|
ARG TRITON_VERSION
|
|
ARG TARGETPLATFORM
|
|
ARG CUDA_VERSION
|
|
LABEL com.nvidia.volumes.needed="nvidia_driver"
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
ca-certificates \
|
|
libjpeg-dev \
|
|
libpng-dev \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
COPY --from=conda-installs /opt/conda /opt/conda
|
|
RUN if test -n "${TRITON_VERSION}" -a "${TARGETPLATFORM}" != "linux/arm64"; then \
|
|
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends gcc; \
|
|
rm -rf /var/lib/apt/lists/*; \
|
|
fi
|
|
ENV PATH /opt/conda/bin:$PATH
|
|
ENV NVIDIA_VISIBLE_DEVICES all
|
|
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
|
|
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
|
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
|
ENV PYTORCH_VERSION ${PYTORCH_VERSION}
|
|
WORKDIR /workspace
|
|
|
|
FROM official as dev
|
|
# Should override the already installed version from the official-image stage
|
|
COPY --from=build /opt/conda /opt/conda
|