From f0c85571edcebf7ec4b2221f2e2a257421da0947 Mon Sep 17 00:00:00 2001 From: Eli Uriegas Date: Fri, 24 Jan 2020 10:24:46 -0800 Subject: [PATCH] docker: Refactor Dockerfile process for official images (#32515) Summary: ## Commit Message: Refactors Dockerfile to be as parallel as possible with caching and adds a new Makefile to build said Dockerfile. Also updated the README.md to reflect the changes as well as updated some of the verbage around running our latest Docker images. Adds the new Dockerfile process to our CircleCI workflows ## How to build: Building the new images is pretty simple, just requires `docker` > 18.06 since the new build process relies on `buildkit` caching and multi-stage build resolving. ### Development images For `runtime` images: ``` make -f docker.Makefile runtime-image ``` For `devel` images: ``` make -f docker.Makefile devel-image ``` Builds are tagged as follows: ```bash docker.io/${docker_user:-whoami}/pytorch:$(git describe --tags)-${image_type} ``` Example: ``` docker.io/seemethere/pytorch:v1.4.0a0-2225-g9eba97b61d-runtime ``` ### Official images Official images are the ones hosted on [`docker.io/pytorch/pytorch`](https://hub.docker.com/r/pytorch/pytorch) To do official images builds you can simply add set the `BUILD_TYPE` variable to `official` and it will do the correct build without building the local binaries: Example: ``` make -f docker.Makefile BUILD_TYPE=official runtime-image ``` ## How to push: Pushing is also super simple (And will automatically tag the right thing based off of the git tag): ``` make -f docker.Makefile runtime-push make -f docker.Makefile devel-push ``` Signed-off-by: Eli Uriegas Pull Request resolved: https://github.com/pytorch/pytorch/pull/32515 Differential Revision: D19558619 Pulled By: seemethere fbshipit-source-id: a06b25cd39ae9890751a60f8f36739ad6ab9ac99 --- Dockerfile | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 26 ++++++++++++------ docker.Makefile | 55 +++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 8 deletions(-) create mode 100644 Dockerfile create mode 100644 docker.Makefile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..0bf93c19b87b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,73 @@ +# syntax = docker/dockerfile:experimental +# +# NOTE: To build this you will need a docker version > 18.06 with +# experimental enabled and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ +ARG BASE_IMAGE=ubuntu:18.04 +ARG PYTHON_VERSION=3.7 + +FROM ${BASE_IMAGE} as dev-base +RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \ + apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + libjpeg-dev \ + libpng-dev && \ + rm -rf /var/lib/apt/lists/* +RUN /usr/sbin/update-ccache-symlinks +RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache +ENV PATH /opt/conda/bin:$PATH + +FROM dev-base as conda +RUN curl -v -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + /opt/conda/bin/conda install -y python=${PYTHON_VERSION} conda-build pyyaml numpy ipython&& \ + /opt/conda/bin/conda clean -ya + +FROM dev-base as submodule-update +WORKDIR /opt/pytorch +COPY . . +RUN git submodule update --init --recursive + +FROM conda as build +WORKDIR /opt/pytorch +COPY --from=conda /opt/conda /opt/conda +COPY --from=submodule-update /opt/pytorch /opt/pytorch +RUN --mount=type=cache,target=/opt/ccache \ + TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ + CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ + python setup.py install + +FROM conda as conda-installs +ARG INSTALL_CHANNEL=pytorch-nightly +RUN /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -y pytorch torchvision && \ + /opt/conda/bin/conda clean -ya + +FROM ${BASE_IMAGE} as official +LABEL com.nvidia.volumes.needed="nvidia_driver" +RUN --mount=type=cache,id=apt-final,target=/var/cache/apt \ + apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libjpeg-dev \ + libpng-dev && \ + rm -rf /var/lib/apt/lists/* +COPY --from=conda-installs /opt/conda /opt/conda +ENV PATH /opt/conda/bin:$PATH +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 +WORKDIR /workspace + +FROM official as dev +# Should override the already installed version from the official-image stage +COPY --from=build /opt/conda /opt/conda diff --git a/README.md b/README.md index 3f09a7930194..17a18aad35f9 100644 --- a/README.md +++ b/README.md @@ -280,20 +280,30 @@ ccmake build # or cmake-gui build ### Docker Image -Dockerfile is supplied to build images with cuda support and cudnn v7. You can pass `-e PYTHON_VERSION=x.y` flag to specify which Python version is to be used by Miniconda, or leave it unset to use the default. Build from pytorch repo directory as docker needs to copy git repo into docker filesystem while building the image. -``` -docker build -t pytorch -f docker/pytorch/Dockerfile . # [optional] --build-arg WITH_TORCHVISION=0 +#### Using pre-built images + +You can also pull a pre-built docker image from Docker Hub and run with docker v19.03+ + +```bash +docker run --gpus all --rm -ti --ipc=host pytorch/pytorch:latest ``` -You can also pull a pre-built docker image from Docker Hub and run with nvidia-docker, -but this is not currently maintained and will pull PyTorch 0.2. -``` -nvidia-docker run --rm -ti --ipc=host pytorch/pytorch:latest -``` Please note that PyTorch uses shared memory to share data between processes, so if torch multiprocessing is used (e.g. for multithreaded data loaders) the default shared memory segment size that container runs with is not enough, and you should increase shared memory size either with `--ipc=host` or `--shm-size` command line options to `nvidia-docker run`. +#### Building the image yourself + +**NOTE:** Must be built with a docker version > 18.06 + +The `Dockerfile` is supplied to build images with cuda support and cudnn v7. +You can pass `PYTHON_VERSION=x.y` make variable to specify which Python version is to be used by Miniconda, or leave it +unset to use the default. +```bash +make -f docker.Makefile +# images are tagged as docker.io/${your_docker_username}/pytorch +``` + ### Building the Documentation To build documentation in various formats, you will need [Sphinx](http://www.sphinx-doc.org) and the diff --git a/docker.Makefile b/docker.Makefile new file mode 100644 index 000000000000..ba53b94d7898 --- /dev/null +++ b/docker.Makefile @@ -0,0 +1,55 @@ +DOCKER_REGISTRY = docker.io +DOCKER_ORG = $(shell docker info 2>/dev/null | sed '/Username:/!d;s/.* //') +DOCKER_IMAGE = pytorch +DOCKER_FULL_NAME = $(DOCKER_REGISTRY)/$(DOCKER_ORG)/$(DOCKER_IMAGE) + +ifeq ("$(DOCKER_ORG)","") +$(warning WARNING: No docker user found using results from whoami) +DOCKER_ORG = $(shell whoami) +endif + +BASE_RUNTIME = ubuntu:18.04 +BASE_DEVEL = nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 + +# The conda channel to use to install pytorch / torchvision +INSTALL_CHANNEL = pytorch + +PYTHON_VERSION = 3.7 +# Can be either official / dev +BUILD_TYPE = dev +BUILD_PROGRESS = auto +BUILD_ARGS = --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg PYTHON_VERSION=$(PYTHON_VERSION) --build-arg INSTALL_CHANNEL=$(INSTALL_CHANNEL) +DOCKER_BUILD = DOCKER_BUILDKIT=1 docker build --progress=$(BUILD_PROGRESS) --target $(BUILD_TYPE) -t $(DOCKER_FULL_NAME):$(DOCKER_TAG) $(BUILD_ARGS) . +DOCKER_PUSH = docker push $(DOCKER_FULL_NAME):$(DOCKER_TAG) + +.PHONY: all +all: devel-image + +.PHONY: devel-image +devel-image: BASE_IMAGE := $(BASE_DEVEL) +devel-image: DOCKER_TAG := $(shell git describe --tags)-devel +devel-image: + $(DOCKER_BUILD) + +.PHONY: devel-image +devel-push: BASE_IMAGE := $(BASE_DEVEL) +devel-push: DOCKER_TAG := $(shell git describe --tags)-devel +devel-push: + $(DOCKER_PUSH) + +.PHONY: runtime-image +runtime-image: BASE_IMAGE := $(BASE_RUNTIME) +runtime-image: DOCKER_TAG := $(shell git describe --tags)-runtime +runtime-image: + $(DOCKER_BUILD) + docker tag $(DOCKER_FULL_NAME):$(DOCKER_TAG) $(DOCKER_FULL_NAME):latest + +.PHONY: runtime-image +runtime-push: BASE_IMAGE := $(BASE_RUNTIME) +runtime-push: DOCKER_TAG := $(shell git describe --tags)-runtime +runtime-push: + $(DOCKER_PUSH) + +.PHONY: clean +clean: + -docker rmi -f $(shell docker images -q $(DOCKER_FULL_NAME))