mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 04:04:57 +08:00 
			
		
		
		
	Compare commits
	
		
			81 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| af3964a872 | |||
| 1645546aa9 | |||
| 350fad8a22 | |||
| 565d183042 | |||
| 2ebda372f6 | |||
| 28b846c486 | |||
| 9622eaa6fa | |||
| db8154df32 | |||
| b6eeea343d | |||
| 1fe9991554 | |||
| 00118024f3 | |||
| 87edf5a349 | |||
| 20972878cc | |||
| 0d1128d25c | |||
| 81dc60493d | |||
| b18df1cedf | |||
| 3976d77509 | |||
| 09c83673bf | |||
| 5b9a8f918e | |||
| f20fb2c1a1 | |||
| 4e00120117 | |||
| 2b3f35daea | |||
| c580437342 | |||
| 455e788fe6 | |||
| c980fb359b | |||
| bae45bb106 | |||
| 34557d80f4 | |||
| 1e77879b2a | |||
| ff52d424b2 | |||
| 4b7aa13b30 | |||
| e1f2d0916e | |||
| 4b5b7e53f6 | |||
| db66fa9436 | |||
| 392c89ab6a | |||
| cddf501fc5 | |||
| d0907d2c34 | |||
| 448a85a8e0 | |||
| ea3138fd09 | |||
| b89c96fe58 | |||
| 088f47bb89 | |||
| ddb3804f87 | |||
| a896311d06 | |||
| 937b634b5d | |||
| 004dfdc7cc | |||
| f8aa5e2ed7 | |||
| 8a49309f81 | |||
| 14de24d89c | |||
| c7cccc250e | |||
| 1f694e9a6e | |||
| 1108bced80 | |||
| c36d452224 | |||
| 11955b86d2 | |||
| 9a6788202b | |||
| d58bad4073 | |||
| f95e252984 | |||
| b49f0f8154 | |||
| 269c25267b | |||
| fde471ee2a | |||
| eb24d2ff6e | |||
| f768068c3b | |||
| c456451915 | |||
| f282d1dc7c | |||
| 2a3cae0f3e | |||
| 3d9630abc2 | |||
| da7a5147db | |||
| 5df8e582cd | |||
| 5dff261598 | |||
| aa0c8920af | |||
| a3b658bf3b | |||
| 94e89f3911 | |||
| f0956ad9ec | |||
| 452ea78f43 | |||
| 3d5d66868e | |||
| cf373e25e2 | |||
| 91d764c781 | |||
| 524235bb71 | |||
| e035fa028b | |||
| 58a928c3b9 | |||
| 4f1eefa8ad | |||
| 4251c151e3 | |||
| c0931a3a4d | 
| @ -1,974 +0,0 @@ | ||||
| docker_config_defaults: &docker_config_defaults | ||||
|   user: jenkins | ||||
|   aws_auth: | ||||
|     # This IAM user only allows read-only access to ECR | ||||
|     aws_access_key_id: AKIAJ2J6FIG5OSZTQ3IA | ||||
|     aws_secret_access_key: ${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_ONLY} | ||||
|  | ||||
| # NOTE: We only perform the merge in build step and not in test step, because | ||||
| # all source files will be shared from build to test | ||||
| merge_pull_request_onto_master: &merge_pull_request_onto_master | ||||
|   name: Merge Onto Master | ||||
|   no_output_timeout: "10h" | ||||
|   command: | | ||||
|     if [[ "${CIRCLE_BRANCH}" != "master" ]]; then | ||||
|       git config --global user.email "circleci.ossci@gmail.com" | ||||
|       git config --global user.name "CircleCI" | ||||
|  | ||||
|       git config remote.origin.url https://github.com/pytorch/pytorch.git | ||||
|       git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master | ||||
|       git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=50 --quiet | ||||
|  | ||||
|       export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master` | ||||
|       echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET} | ||||
|       export GIT_COMMIT=${CIRCLE_SHA1} | ||||
|       echo "GIT_COMMIT: " ${GIT_COMMIT} | ||||
|  | ||||
|       git checkout -f ${GIT_COMMIT} | ||||
|       git reset --hard ${GIT_COMMIT} | ||||
|       git merge --no-edit --no-ff ${GIT_MERGE_TARGET} | ||||
|     fi | ||||
|  | ||||
| pytorch_linux_cpu_build_test_defaults: &pytorch_linux_cpu_build_test_defaults | ||||
|   resource_class: large | ||||
|   working_directory: /var/lib/jenkins/workspace | ||||
|   steps: | ||||
|   - checkout | ||||
|   - run: | ||||
|       <<: *merge_pull_request_onto_master | ||||
|   - run: | ||||
|       name: Build | ||||
|       no_output_timeout: "10h" | ||||
|       command: | | ||||
|         export IN_CIRCLECI=1 | ||||
|         export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 | ||||
|         export SCCACHE_MAX_JOBS=`expr $(nproc) - 1` | ||||
|         export MEMORY_LIMIT_MAX_JOBS=8  # the "large" resource class on CircleCI has 32 CPU cores, if we use all of them we'll OOM | ||||
|         export MAX_JOBS=$(( ${SCCACHE_MAX_JOBS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${SCCACHE_MAX_JOBS} )) | ||||
|         # This IAM user allows write access to S3 bucket for sccache | ||||
|         export AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA | ||||
|         export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET} | ||||
|         git submodule sync && git submodule update --init | ||||
|         .jenkins/pytorch/build.sh | ||||
|         .jenkins/pytorch/test.sh | ||||
|  | ||||
| pytorch_linux_build_defaults: &pytorch_linux_build_defaults | ||||
|   resource_class: large | ||||
|   working_directory: /var/lib/jenkins/workspace | ||||
|   steps: | ||||
|   - checkout | ||||
|   - run: | ||||
|       <<: *merge_pull_request_onto_master | ||||
|   - run: | ||||
|       name: Build | ||||
|       no_output_timeout: "10h" | ||||
|       command: | | ||||
|         export IN_CIRCLECI=1 | ||||
|         export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           export TORCH_CUDA_ARCH_LIST=5.2 | ||||
|         fi | ||||
|         export SCCACHE_MAX_JOBS=`expr $(nproc) - 1` | ||||
|         export MEMORY_LIMIT_MAX_JOBS=8  # the "large" resource class on CircleCI has 32 CPU cores, if we use all of them we'll OOM | ||||
|         export MAX_JOBS=$(( ${SCCACHE_MAX_JOBS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${SCCACHE_MAX_JOBS} )) | ||||
|         # This IAM user allows write access to S3 bucket for sccache | ||||
|         export AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA | ||||
|         export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET} | ||||
|         git submodule sync && git submodule update --init | ||||
|         .jenkins/pytorch/build.sh | ||||
|         export PYTORCH_CI_ENV_DIR=/var/lib/jenkins/pytorch-ci-env | ||||
|         mkdir -p ${PYTORCH_CI_ENV_DIR} | ||||
|         cp -r /var/lib/jenkins/workspace ${PYTORCH_CI_ENV_DIR}/build_workspace  # This copies all source files from build step to the next step | ||||
|         cp -r /opt/conda/lib/python${PYTHON_VERSION}/site-packages/torch ${PYTORCH_CI_ENV_DIR}/torch | ||||
|         cp -r build/bin ${PYTORCH_CI_ENV_DIR}/cpp_test_bin | ||||
|         if [ -d "../cpp-build" ]; then | ||||
|           cp -r ../cpp-build ${PYTORCH_CI_ENV_DIR}/cpp-build | ||||
|         fi | ||||
|   - persist_to_workspace: | ||||
|       root: /var/lib/jenkins/pytorch-ci-env | ||||
|       paths: | ||||
|         - "*" | ||||
|  | ||||
| pytorch_linux_test_defaults: &pytorch_linux_test_defaults | ||||
|   machine: | ||||
|     image: default | ||||
|   steps: | ||||
|   - run: | ||||
|       name: Prepare workspace | ||||
|       command: | | ||||
|         sudo mkdir -p /opt/workspace | ||||
|         sudo chmod -R 777 /opt/workspace | ||||
|   - attach_workspace: | ||||
|       at: /opt/workspace | ||||
|   - run: | ||||
|       name: Build | ||||
|       no_output_timeout: "10h" | ||||
|       command: | | ||||
|         set -x | ||||
|         sudo pip install awscli | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - | ||||
|           echo "deb https://nvidia.github.io/libnvidia-container/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list | ||||
|           echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list | ||||
|           echo "deb https://nvidia.github.io/nvidia-docker/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list | ||||
|         fi | ||||
|         sudo apt-get update | ||||
|         sudo apt-get remove linux-image-generic linux-headers-generic linux-generic | ||||
|         sudo apt-get install linux-headers-$(uname -r) | ||||
|         sudo apt-get install linux-image-generic | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           wget 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-396.26.run' | ||||
|           sudo /bin/bash ./NVIDIA-Linux-x86_64-396.26.run -s --no-drm | ||||
|           sudo apt-get install -y nvidia-docker2 | ||||
|         fi | ||||
|         sudo pkill -SIGHUP dockerd | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           nvidia-smi | ||||
|         fi | ||||
|         # This IAM user only allows read-only access to ECR | ||||
|         export AWS_ACCESS_KEY_ID=AKIAJ2J6FIG5OSZTQ3IA | ||||
|         export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_ONLY} | ||||
|         eval $(aws ecr get-login --region us-east-1 --no-include-email) | ||||
|         docker pull ${DOCKER_IMAGE} | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           id=$(docker run --runtime=nvidia -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) | ||||
|         else | ||||
|           id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) | ||||
|         fi | ||||
|         pwd | ||||
|  | ||||
|         cp -r /opt/workspace/build_workspace/. /home/circleci/project  # This copies all source files from build step to the current step | ||||
|  | ||||
|         echo "declare -x IN_CIRCLECI=1" > /home/circleci/project/env | ||||
|         echo "declare -x PYTHON_VERSION=${PYTHON_VERSION}" >> /home/circleci/project/env | ||||
|         echo "declare -x SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> /home/circleci/project/env | ||||
|         # This IAM user allows write access to S3 bucket for sccache | ||||
|         echo "declare -x AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA" >> /home/circleci/project/env | ||||
|         echo "declare -x AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET}" >> /home/circleci/project/env | ||||
|  | ||||
|         mkdir -p /home/circleci/project/build | ||||
|         cp -r /opt/workspace/cpp_test_bin /home/circleci/project/build/bin | ||||
|         docker cp /home/circleci/project/. "$id:/var/lib/jenkins/workspace" | ||||
|         echo "mkdir -p /opt/conda/lib/python${PYTHON_VERSION}/site-packages" | docker exec -u jenkins -i "$id" bash | ||||
|         docker cp "/opt/workspace/torch" "$id:/opt/conda/lib/python${PYTHON_VERSION}/site-packages/torch" | ||||
|         if [ -d "/opt/workspace/cpp-build" ]; then | ||||
|           docker cp "/opt/workspace/cpp-build" "$id:/var/lib/jenkins/cpp-build" | ||||
|         fi | ||||
|         if [ -n "${MULTI_GPU}" ]; then | ||||
|           (echo "source ./workspace/env" && echo 'sudo chown -R jenkins workspace /opt/conda/lib/python${PYTHON_VERSION}/site-packages/torch && cd workspace && .jenkins/pytorch/multigpu-test.sh') | docker exec -u jenkins -i "$id" bash | ||||
|         else | ||||
|           (echo "source ./workspace/env" && echo 'sudo chown -R jenkins workspace /opt/conda/lib/python${PYTHON_VERSION}/site-packages/torch && cd workspace && .jenkins/pytorch/test.sh') | docker exec -u jenkins -i "$id" bash | ||||
|         fi | ||||
|  | ||||
| caffe2_linux_build_defaults: &caffe2_linux_build_defaults | ||||
|   resource_class: large | ||||
|   working_directory: /var/lib/jenkins/workspace | ||||
|   steps: | ||||
|   - checkout | ||||
|   - run: | ||||
|       <<: *merge_pull_request_onto_master | ||||
|   - run: | ||||
|       name: Build | ||||
|       no_output_timeout: "10h" | ||||
|       command: | | ||||
|         export IN_CIRCLECI=1 | ||||
|         export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 | ||||
|         # This IAM user allows write access to S3 bucket for sccache | ||||
|         export AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA | ||||
|         export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET} | ||||
|         export SCCACHE_MAX_JOBS=`expr $(nproc) - 1` | ||||
|         export MEMORY_LIMIT_MAX_JOBS=8  # the "large" resource class on CircleCI has 32 CPU cores, if we use all of them we'll OOM | ||||
|         export MAX_JOBS=$(( ${SCCACHE_MAX_JOBS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${SCCACHE_MAX_JOBS} )) | ||||
|  | ||||
|         set -ex | ||||
|  | ||||
|         # Need to checkout fetch PRs for onnxbot tracking PRs | ||||
|         git submodule update --init third_party/onnx || true | ||||
|         cd third_party/onnx && git fetch --tags --progress origin +refs/pull/*:refs/remotes/origin/pr/* && cd - | ||||
|  | ||||
|         # Reinitialize submodules | ||||
|         git submodule sync && git submodule update --init --recursive | ||||
|  | ||||
|         # Ensure jenkins can write to the ccache root dir. | ||||
|         sudo chown jenkins:jenkins "${HOME}/.ccache" | ||||
|  | ||||
|         # Make ccache log to the workspace, so we can archive it after the build | ||||
|         mkdir -p build | ||||
|         ccache -o log_file=$PWD/build/ccache.log | ||||
|  | ||||
|         # Configure additional cmake arguments | ||||
|         cmake_args=() | ||||
|         cmake_args+=("$CMAKE_ARGS") | ||||
|  | ||||
|         if [[ $BUILD_ENVIRONMENT == *aten* ]]; then | ||||
|           cmake_args+=("-DBUILD_ATEN=ON") | ||||
|         fi | ||||
|  | ||||
|         # conda must be added to the path for Anaconda builds (this location must be | ||||
|         # the same as that in install_anaconda.sh used to build the docker image) | ||||
|         if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then | ||||
|           export PATH=/opt/conda/bin:$PATH | ||||
|           sudo chown -R jenkins:jenkins '/opt/conda' | ||||
|         fi | ||||
|  | ||||
|         # Build | ||||
|         if test -x ".jenkins/caffe2/build.sh"; then | ||||
|           ./.jenkins/caffe2/build.sh ${cmake_args[@]} | ||||
|         else | ||||
|           ./.jenkins/build.sh ${cmake_args[@]} | ||||
|         fi | ||||
|  | ||||
|         # Show sccache stats if it is running | ||||
|         if pgrep sccache > /dev/null; then | ||||
|           sccache --show-stats | ||||
|         fi | ||||
|  | ||||
|         # Copy all necessary binaries to shared workspace | ||||
|         export CAFFE2_CI_ENV_DIR=/var/lib/jenkins/caffe2-ci-env | ||||
|         mkdir -p ${CAFFE2_CI_ENV_DIR} | ||||
|         cp -r /var/lib/jenkins/workspace ${CAFFE2_CI_ENV_DIR}/build_workspace  # This copies all source files from build step to the next step | ||||
|         cp -r third_party/onnx ${CAFFE2_CI_ENV_DIR}/onnx | ||||
|         if [ -d "/usr/local/caffe2" ]; then | ||||
|           cp -r /usr/local/caffe2 ${CAFFE2_CI_ENV_DIR}/caffe2 | ||||
|         fi | ||||
|         if [ -d "/opt/conda" ]; then | ||||
|           cp -r /opt/conda ${CAFFE2_CI_ENV_DIR}/conda_env | ||||
|         fi | ||||
|   - persist_to_workspace: | ||||
|       root: /var/lib/jenkins/caffe2-ci-env | ||||
|       paths: | ||||
|         - "*" | ||||
|  | ||||
| caffe2_linux_test_defaults: &caffe2_linux_test_defaults | ||||
|   machine: | ||||
|     image: default | ||||
|   steps: | ||||
|   - run: | ||||
|       name: Prepare workspace | ||||
|       command: | | ||||
|         sudo mkdir -p /opt/workspace | ||||
|         sudo chmod -R 777 /opt/workspace | ||||
|   - attach_workspace: | ||||
|       at: /opt/workspace | ||||
|   - run: | ||||
|       name: Build | ||||
|       no_output_timeout: "10h" | ||||
|       command: | | ||||
|         set -x | ||||
|         sudo pip install awscli | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - | ||||
|           echo "deb https://nvidia.github.io/libnvidia-container/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list | ||||
|           echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list | ||||
|           echo "deb https://nvidia.github.io/nvidia-docker/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list | ||||
|         fi | ||||
|         sudo apt-get update | ||||
|         sudo apt-get remove linux-image-generic linux-headers-generic linux-generic | ||||
|         sudo apt-get install linux-headers-$(uname -r) | ||||
|         sudo apt-get install linux-image-generic | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           wget 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-396.26.run' | ||||
|           sudo /bin/bash ./NVIDIA-Linux-x86_64-396.26.run -s --no-drm | ||||
|           sudo apt-get install -y nvidia-docker2 | ||||
|         fi | ||||
|         sudo pkill -SIGHUP dockerd | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           nvidia-smi | ||||
|         fi | ||||
|         # This IAM user only allows read-only access to ECR | ||||
|         export AWS_ACCESS_KEY_ID=AKIAJ2J6FIG5OSZTQ3IA | ||||
|         export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_ONLY} | ||||
|         eval $(aws ecr get-login --region us-east-1 --no-include-email) | ||||
|         docker pull ${DOCKER_IMAGE} | ||||
|         if [ -n "${CUDA_VERSION}" ]; then | ||||
|           id=$(docker run --runtime=nvidia -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) | ||||
|         else | ||||
|           id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) | ||||
|         fi | ||||
|         pwd | ||||
|         cp -r /opt/workspace/build_workspace/. /home/circleci/project  # This copies all source files from build step to the current step | ||||
|         echo "declare -x IN_CIRCLECI=1" > /home/circleci/project/env | ||||
|         echo "declare -x SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> /home/circleci/project/env | ||||
|         # This IAM user allows write access to S3 bucket for sccache | ||||
|         echo "declare -x AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA" >> /home/circleci/project/env | ||||
|         echo "declare -x AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET}" >> /home/circleci/project/env | ||||
|         echo "declare -x BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" >> /home/circleci/project/env | ||||
|  | ||||
|         # TODO: merge this into Caffe2 build.sh | ||||
|         cat >/home/circleci/project/ci_build_script.sh <<EOL | ||||
|         # =================== The following code will be executed inside Docker container =================== | ||||
|         set -ex | ||||
|  | ||||
|         # libdc1394 (dependency of OpenCV) expects /dev/raw1394 to exist... | ||||
|         sudo ln /dev/null /dev/raw1394 | ||||
|  | ||||
|         # Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04 | ||||
|         # See comments on https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830 | ||||
|         if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then | ||||
|           sudo pip uninstall -y hypothesis | ||||
|           # "pip install hypothesis==3.44.6" from official server is unreliable on CircleCI, so we host a copy on S3 instead | ||||
|           sudo pip install attrs -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl | ||||
|           sudo pip install coverage -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl | ||||
|           sudo pip install hypothesis -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl | ||||
|         fi | ||||
|  | ||||
|         # conda must be added to the path for Anaconda builds (this location must be | ||||
|         # the same as that in install_anaconda.sh used to build the docker image) | ||||
|         if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then | ||||
|           export PATH=/opt/conda/bin:$PATH | ||||
|         fi | ||||
|  | ||||
|         pip install --user -b /tmp/pip_install_onnx "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx" | ||||
|         pip install --user future | ||||
|  | ||||
|         # Build | ||||
|         if test -x ".jenkins/caffe2/test.sh"; then | ||||
|           ./.jenkins/caffe2/test.sh | ||||
|         else | ||||
|           ./.jenkins/test.sh | ||||
|         fi | ||||
|  | ||||
|         # Remove benign core dumps. | ||||
|         # These are tests for signal handling (including SIGABRT). | ||||
|         rm -f ./crash/core.fatal_signal_as.* | ||||
|         rm -f ./crash/core.logging_test.* | ||||
|         # =================== The above code will be executed inside Docker container =================== | ||||
|         EOL | ||||
|         chmod +x /home/circleci/project/ci_build_script.sh | ||||
|         docker cp /home/circleci/project/. "$id:/var/lib/jenkins/workspace" | ||||
|         if [ -d "/opt/workspace/caffe2" ]; then | ||||
|           echo "mkdir -p /usr/local/caffe2" | docker exec -u jenkins -i "$id" bash | ||||
|           docker cp /opt/workspace/caffe2/. "$id:/usr/local/caffe2" | ||||
|         fi | ||||
|         if [ -d "/opt/workspace/conda_env" ]; then | ||||
|           echo "sudo mkdir -p /opt/conda" | docker exec -u jenkins -i "$id" bash | ||||
|           docker cp /opt/workspace/conda_env/. "$id:/opt/conda" | ||||
|         fi | ||||
|         docker cp /opt/workspace/onnx/. "$id:/var/lib/jenkins/workspace/third_party/onnx" | ||||
|         (echo "source ./workspace/env" && echo 'sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh') | docker exec -u jenkins -i "$id" bash | ||||
|  | ||||
| caffe2_macos_build_defaults: &caffe2_macos_build_defaults | ||||
|   macos: | ||||
|     xcode: "9.0" | ||||
|   steps: | ||||
|     - checkout | ||||
|     - run: | ||||
|         <<: *merge_pull_request_onto_master | ||||
|     - run: | ||||
|         name: Build | ||||
|         no_output_timeout: "10h" | ||||
|         command: | | ||||
|           set -ex | ||||
|  | ||||
|           export IN_CIRCLECI=1 | ||||
|  | ||||
|           brew install cmake | ||||
|  | ||||
|           # Reinitialize submodules | ||||
|           git submodule sync && git submodule update --init --recursive | ||||
|  | ||||
|           # Reinitialize path (see man page for path_helper(8)) | ||||
|           eval `/usr/libexec/path_helper -s` | ||||
|  | ||||
|           # Use Homebrew Python if configured to do so | ||||
|           if [ "${PYTHON_INSTALLATION}" == "homebrew" ]; then | ||||
|             export PATH=/usr/local/opt/python/libexec/bin:/usr/local/bin:$PATH | ||||
|           fi | ||||
|  | ||||
|           pip install numpy | ||||
|  | ||||
|           # Install Anaconda if we need to | ||||
|           if [ -n "${CAFFE2_USE_ANACONDA}" ]; then | ||||
|             rm -rf ${TMPDIR}/anaconda | ||||
|             curl -o ${TMPDIR}/anaconda.sh "https://repo.continuum.io/archive/Anaconda${ANACONDA_VERSION}-5.0.1-MacOSX-x86_64.sh" | ||||
|             /bin/bash ${TMPDIR}/anaconda.sh -b -p ${TMPDIR}/anaconda | ||||
|             rm -f ${TMPDIR}/anaconda.sh | ||||
|             export PATH="${TMPDIR}/anaconda/bin:${PATH}" | ||||
|             source ${TMPDIR}/anaconda/bin/activate | ||||
|           fi | ||||
|  | ||||
|           # Install sccache | ||||
|           sudo curl https://s3.amazonaws.com/ossci-macos/sccache --output /usr/local/bin/sccache | ||||
|           sudo chmod +x /usr/local/bin/sccache | ||||
|           export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 | ||||
|  | ||||
|           # This IAM user allows write access to S3 bucket for sccache | ||||
|           export AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA | ||||
|           export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET} | ||||
|  | ||||
|           export SCCACHE_BIN=${PWD}/sccache_bin | ||||
|           mkdir -p ${SCCACHE_BIN} | ||||
|           if which sccache > /dev/null; then | ||||
|             printf "#!/bin/sh\nexec sccache $(which clang++) \$*" > "${SCCACHE_BIN}/clang++" | ||||
|             chmod a+x "${SCCACHE_BIN}/clang++" | ||||
|  | ||||
|             printf "#!/bin/sh\nexec sccache $(which clang) \$*" > "${SCCACHE_BIN}/clang" | ||||
|             chmod a+x "${SCCACHE_BIN}/clang" | ||||
|  | ||||
|             export PATH="${SCCACHE_BIN}:$PATH" | ||||
|           fi | ||||
|  | ||||
|           # Build | ||||
|           if [ "${BUILD_IOS:-0}" -eq 1 ]; then | ||||
|             scripts/build_ios.sh | ||||
|           elif [ -n "${CAFFE2_USE_ANACONDA}" ]; then | ||||
|             # All conda build logic should be in scripts/build_anaconda.sh | ||||
|             scripts/build_anaconda.sh | ||||
|           else | ||||
|             scripts/build_local.sh | ||||
|           fi | ||||
|  | ||||
|           # Show sccache stats if it is running | ||||
|           if which sccache > /dev/null; then | ||||
|             sccache --show-stats | ||||
|           fi | ||||
|  | ||||
| version: 2 | ||||
| jobs: | ||||
|   pytorch_linux_trusty_py2_7_9_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-py2.7.9:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_trusty_py2_7_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-py2.7:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_trusty_py3_5_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-py3.5:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_trusty_py3_6_gcc4_8_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-py3.6-gcc4.8:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_trusty_py3_6_gcc5_4_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-py3.6-gcc5.4:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_trusty_py3_6_gcc7_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-py3.6-gcc7:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_trusty_pynightly_build_test: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-trusty-pynightly:238 | ||||
|         <<: *docker_config_defaults | ||||
|     <<: *pytorch_linux_cpu_build_test_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_py3_clang5_asan_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan:238 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       PYTHON_VERSION: "3.6" | ||||
|     <<: *pytorch_linux_build_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_py3_clang5_asan_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan:238" | ||||
|       PYTHON_VERSION: "3.6" | ||||
|     resource_class: large | ||||
|     <<: *pytorch_linux_test_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda8_cudnn6_py3_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda8-cudnn6-py3:238 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "8" | ||||
|     <<: *pytorch_linux_build_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda8_cudnn6_py3_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda8-cudnn6-py3:238" | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "8" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *pytorch_linux_test_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda8_cudnn6_py3_multigpu_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda8-cudnn6-py3:238" | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "8" | ||||
|       MULTI_GPU: "1" | ||||
|     resource_class: gpu.large | ||||
|     <<: *pytorch_linux_test_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda9_cudnn7_py2_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9-cudnn7-py2:238 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       PYTHON_VERSION: "2.7" | ||||
|       CUDA_VERSION: "9" | ||||
|     <<: *pytorch_linux_build_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda9_cudnn7_py2_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9-cudnn7-py2:238" | ||||
|       PYTHON_VERSION: "2.7" | ||||
|       CUDA_VERSION: "9" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *pytorch_linux_test_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda9_cudnn7_py3_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9-cudnn7-py3:238 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "9" | ||||
|     <<: *pytorch_linux_build_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda9_cudnn7_py3_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9-cudnn7-py3:238" | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "9" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *pytorch_linux_test_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7:238 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "9.2" | ||||
|     <<: *pytorch_linux_build_defaults | ||||
|  | ||||
|   pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7:238" | ||||
|       PYTHON_VERSION: "3.6" | ||||
|       CUDA_VERSION: "9.2" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *pytorch_linux_test_defaults | ||||
|  | ||||
|   pytorch_macos_10_13_py3_build: | ||||
|     macos: | ||||
|       xcode: "9.0" | ||||
|     steps: | ||||
|       - checkout | ||||
|       - run: | ||||
|           <<: *merge_pull_request_onto_master | ||||
|       - run: | ||||
|           name: Build | ||||
|           environment: | ||||
|             BUILD_ENVIRONMENT: pytorch-macos-10.13-py3 | ||||
|           no_output_timeout: "10h" | ||||
|           command: | | ||||
|             set -ex | ||||
|  | ||||
|             export IN_CIRCLECI=1 | ||||
|  | ||||
|             # Install sccache | ||||
|             sudo curl https://s3.amazonaws.com/ossci-macos/sccache --output /usr/local/bin/sccache | ||||
|             sudo chmod +x /usr/local/bin/sccache | ||||
|  | ||||
|             export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 | ||||
|             # This IAM user allows write access to S3 bucket for sccache | ||||
|             export AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA | ||||
|             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET} | ||||
|  | ||||
|             git submodule sync && git submodule update --init | ||||
|             chmod a+x .jenkins/pytorch/macos-build.sh | ||||
|             .jenkins/pytorch/macos-build.sh | ||||
|  | ||||
|             # TODO: need to share source files from build to test, when macOS builds are enabled | ||||
|  | ||||
|       - persist_to_workspace: | ||||
|           root: /Users/distiller/pytorch-ci-env | ||||
|           paths: | ||||
|             - "*" | ||||
|  | ||||
|   pytorch_macos_10_13_py3_test: | ||||
|     macos: | ||||
|       xcode: "9.0" | ||||
|     steps: | ||||
|       - run: | ||||
|           name: Prepare workspace | ||||
|           command: | | ||||
|             sudo mkdir -p /Users/distiller/pytorch-ci-env | ||||
|             sudo chmod -R 777 /Users/distiller/pytorch-ci-env | ||||
|       - attach_workspace: | ||||
|           at: /Users/distiller/pytorch-ci-env | ||||
|       - run: | ||||
|           name: Build | ||||
|           environment: | ||||
|             BUILD_ENVIRONMENT: pytorch-macos-10.13-py3 | ||||
|           no_output_timeout: "10h" | ||||
|           command: | | ||||
|             # TODO: need to share source files from build to test, when macOS builds are enabled | ||||
|             set -ex | ||||
|             export IN_CIRCLECI=1 | ||||
|             chmod a+x .jenkins/pytorch/macos-test.sh | ||||
|             .jenkins/pytorch/macos-test.sh | ||||
|  | ||||
|   pytorch_macos_10_13_cuda9_2_cudnn7_py3_build: | ||||
|     macos: | ||||
|       xcode: "9.0" | ||||
|     steps: | ||||
|       - checkout | ||||
|       - run: | ||||
|           <<: *merge_pull_request_onto_master | ||||
|       - run: | ||||
|           name: Build | ||||
|           environment: | ||||
|             JOB_BASE_NAME: pytorch-macos-10.13-cuda9.2-cudnn7-py3-build | ||||
|             BUILD_ENVIRONMENT: pytorch-macos-10.13-cuda9.2-cudnn7-py3 | ||||
|           no_output_timeout: "10h" | ||||
|           command: | | ||||
|             set -ex | ||||
|  | ||||
|             export IN_CIRCLECI=1 | ||||
|  | ||||
|             # Install CUDA 9.2 | ||||
|             sudo rm -rf ~/cuda_9.2.64_mac_installer.app || true | ||||
|             curl https://s3.amazonaws.com/ossci-macos/cuda_9.2.64_mac_installer.zip -o ~/cuda_9.2.64_mac_installer.zip | ||||
|             unzip ~/cuda_9.2.64_mac_installer.zip -d ~/ | ||||
|             sudo ~/cuda_9.2.64_mac_installer.app/Contents/MacOS/CUDAMacOSXInstaller --accept-eula --no-window | ||||
|             sudo cp /usr/local/cuda/lib/libcuda.dylib /Developer/NVIDIA/CUDA-9.2/lib/libcuda.dylib | ||||
|             sudo rm -rf /usr/local/cuda || true | ||||
|  | ||||
|             # Install cuDNN 7.1 for CUDA 9.2 | ||||
|             curl https://s3.amazonaws.com/ossci-macos/cudnn-9.2-osx-x64-v7.1.tgz -o ~/cudnn-9.2-osx-x64-v7.1.tgz | ||||
|             rm -rf ~/cudnn-9.2-osx-x64-v7.1 && mkdir ~/cudnn-9.2-osx-x64-v7.1 | ||||
|             tar -xzvf ~/cudnn-9.2-osx-x64-v7.1.tgz -C ~/cudnn-9.2-osx-x64-v7.1 | ||||
|             sudo cp ~/cudnn-9.2-osx-x64-v7.1/cuda/include/cudnn.h /Developer/NVIDIA/CUDA-9.2/include/ | ||||
|             sudo cp ~/cudnn-9.2-osx-x64-v7.1/cuda/lib/libcudnn* /Developer/NVIDIA/CUDA-9.2/lib/ | ||||
|             sudo chmod a+r /Developer/NVIDIA/CUDA-9.2/include/cudnn.h /Developer/NVIDIA/CUDA-9.2/lib/libcudnn* | ||||
|  | ||||
|             # Install sccache | ||||
|             sudo curl https://s3.amazonaws.com/ossci-macos/sccache --output /usr/local/bin/sccache | ||||
|             sudo chmod +x /usr/local/bin/sccache | ||||
|             export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2 | ||||
|             # This IAM user allows write access to S3 bucket for sccache | ||||
|             export AWS_ACCESS_KEY_ID=AKIAJJZUW4G2ASX5W7KA | ||||
|             export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET} | ||||
|  | ||||
|             git submodule sync && git submodule update --init | ||||
|             chmod a+x .jenkins/pytorch/macos-build.sh | ||||
|             .jenkins/pytorch/macos-build.sh | ||||
|  | ||||
|   caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda8.0-cudnn6-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       CUDA_VERSION: "8" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda8.0-cudnn6-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda8.0-cudnn6-ubuntu16.04:190" | ||||
|       CUDA_VERSION: "8" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda8.0-cudnn6-ubuntu16.04" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       CUDA_VERSION: "9" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.0-cudnn7-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:190" | ||||
|       CUDA_VERSION: "9" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.0-cudnn7-ubuntu16.04" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_0_cudnn7_aten_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       CUDA_VERSION: "9" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.0-cudnn7-aten-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_0_cudnn7_aten_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-ubuntu16.04:190" | ||||
|       CUDA_VERSION: "9" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.0-cudnn7-aten-ubuntu16.04" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       CUDA_VERSION: "9.1" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.1-cudnn7-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.1-cudnn7-ubuntu16.04:190" | ||||
|       CUDA_VERSION: "9.1" | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.1-cudnn7-ubuntu16.04" | ||||
|     resource_class: gpu.medium | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_py2_mkl_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-mkl-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_mkl_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-mkl-ubuntu16.04:190" | ||||
|       BUILD_ENVIRONMENT: "py2-mkl-ubuntu16.04" | ||||
|     resource_class: large | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_py2_gcc4_8_ubuntu14_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-gcc4.8-ubuntu14.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_gcc4_8_ubuntu14_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.8-ubuntu14.04:190" | ||||
|       BUILD_ENVIRONMENT: "py2-gcc4.8-ubuntu14.04" | ||||
|     resource_class: large | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_onnx_py2_gcc5_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "onnx-py2-gcc5-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_onnx_py2_gcc5_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc5-ubuntu16.04:190" | ||||
|       BUILD_ENVIRONMENT: "onnx-py2-gcc5-ubuntu16.04" | ||||
|     resource_class: large | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_conda2_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/conda2-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "conda2-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_conda2_ubuntu16_04_test: | ||||
|     environment: | ||||
|       DOCKER_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/conda2-ubuntu16.04:190" | ||||
|       BUILD_ENVIRONMENT: "conda2-ubuntu16.04" | ||||
|     resource_class: large | ||||
|     <<: *caffe2_linux_test_defaults | ||||
|  | ||||
|   caffe2_py2_cuda8_0_cudnn7_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda8.0-cudnn7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-cuda8.0-cudnn7-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_gcc4_9_ubuntu14_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc4.9-ubuntu14.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-gcc4.9-ubuntu14.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_clang3_8_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.8-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-clang3.8-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_clang3_9_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-clang3.9-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-clang3.9-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_gcc6_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc6-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-gcc6-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_gcc7_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-gcc7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-gcc7-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_cuda8_0_cudnn7_aten_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda8.0-cudnn7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-cuda8.0-cudnn7-aten-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_android_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-android-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-android-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_conda3_cuda9_0_cudnn7_ubuntu16_04_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/conda3-cuda9.0-cudnn7-ubuntu16.04:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "conda3-cuda9.0-cudnn7-ubuntu16.04" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_cuda9_0_cudnn7_centos7_build: | ||||
|     docker: | ||||
|       - image: 308535385114.dkr.ecr.us-east-1.amazonaws.com/caffe2/py2-cuda9.0-cudnn7-centos7:190 | ||||
|         <<: *docker_config_defaults | ||||
|     environment: | ||||
|       BUILD_ENVIRONMENT: "py2-cuda9.0-cudnn7-centos7" | ||||
|     <<: *caffe2_linux_build_defaults | ||||
|  | ||||
|   caffe2_py2_ios_macos10_13_build: | ||||
|     environment: | ||||
|       BUILD_IOS: "1" | ||||
|       PYTHON_INSTALLATION: "system" | ||||
|       PYTHON_VERSION: "2" | ||||
|     <<: *caffe2_macos_build_defaults | ||||
|  | ||||
|   caffe2_py2_system_macos10_13_build: | ||||
|     environment: | ||||
|       PYTHON_INSTALLATION: "system" | ||||
|       PYTHON_VERSION: "2" | ||||
|     <<: *caffe2_macos_build_defaults | ||||
|  | ||||
| workflows: | ||||
|   version: 2 | ||||
|   build: | ||||
|     jobs: | ||||
|       # - pytorch_linux_trusty_py2_7_9_build_test | ||||
|       # - pytorch_linux_trusty_py2_7_build_test | ||||
|       # - pytorch_linux_trusty_py3_5_build_test | ||||
|       # - pytorch_linux_trusty_py3_6_gcc4_8_build_test | ||||
|       # - pytorch_linux_trusty_py3_6_gcc5_4_build_test | ||||
|       # - pytorch_linux_trusty_py3_6_gcc7_build_test | ||||
|       # - pytorch_linux_trusty_pynightly_build_test | ||||
|       # - pytorch_linux_xenial_py3_clang5_asan_build | ||||
|       # - pytorch_linux_xenial_py3_clang5_asan_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_linux_xenial_py3_clang5_asan_build | ||||
|       # - pytorch_linux_xenial_cuda8_cudnn6_py3_build | ||||
|       # - pytorch_linux_xenial_cuda8_cudnn6_py3_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_linux_xenial_cuda8_cudnn6_py3_build | ||||
|       # - pytorch_linux_xenial_cuda8_cudnn6_py3_multigpu_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_linux_xenial_cuda8_cudnn6_py3_build | ||||
|       # - pytorch_linux_xenial_cuda9_cudnn7_py2_build | ||||
|       # - pytorch_linux_xenial_cuda9_cudnn7_py2_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_linux_xenial_cuda9_cudnn7_py2_build | ||||
|       # - pytorch_linux_xenial_cuda9_cudnn7_py3_build | ||||
|       # - pytorch_linux_xenial_cuda9_cudnn7_py3_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_linux_xenial_cuda9_cudnn7_py3_build | ||||
|       # - pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_build | ||||
|       # - pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_build | ||||
|  | ||||
|       # - pytorch_macos_10_13_py3_build | ||||
|       # - pytorch_macos_10_13_py3_test: | ||||
|       #     requires: | ||||
|       #       - pytorch_macos_10_13_py3_build | ||||
|       # - pytorch_macos_10_13_cuda9_2_cudnn7_py3_build | ||||
|  | ||||
|       # - caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_py2_cuda8_0_cudnn6_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_py2_cuda9_0_cudnn7_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_0_cudnn7_aten_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_0_cudnn7_aten_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_py2_cuda9_0_cudnn7_aten_ubuntu16_04_build | ||||
|       # - caffe2_py2_mkl_ubuntu16_04_build | ||||
|       # - caffe2_py2_mkl_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_py2_mkl_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_py2_cuda9_1_cudnn7_ubuntu16_04_build | ||||
|       # - caffe2_py2_gcc4_8_ubuntu14_04_build | ||||
|       # - caffe2_py2_gcc4_8_ubuntu14_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_py2_gcc4_8_ubuntu14_04_build | ||||
|       # - caffe2_onnx_py2_gcc5_ubuntu16_04_build | ||||
|       # - caffe2_onnx_py2_gcc5_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_onnx_py2_gcc5_ubuntu16_04_build | ||||
|       # - caffe2_conda2_ubuntu16_04_build | ||||
|       # - caffe2_conda2_ubuntu16_04_test: | ||||
|       #     requires: | ||||
|       #       - caffe2_conda2_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda8_0_cudnn7_ubuntu16_04_build | ||||
|       # - caffe2_py2_gcc4_9_ubuntu14_04_build | ||||
|       # - caffe2_py2_clang3_8_ubuntu16_04_build | ||||
|       # - caffe2_py2_clang3_9_ubuntu16_04_build | ||||
|       # - caffe2_py2_gcc6_ubuntu16_04_build | ||||
|       # - caffe2_py2_gcc7_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda8_0_cudnn7_aten_ubuntu16_04_build | ||||
|       # - caffe2_py2_android_ubuntu16_04_build | ||||
|       # - caffe2_conda3_cuda9_0_cudnn7_ubuntu16_04_build | ||||
|       # - caffe2_py2_cuda9_0_cudnn7_centos7_build | ||||
|  | ||||
|       # - caffe2_py2_ios_macos10_13_build | ||||
|       # - caffe2_py2_system_macos10_13_build | ||||
| @ -1,88 +0,0 @@ | ||||
| --- | ||||
| AccessModifierOffset: -1 | ||||
| AlignAfterOpenBracket: AlwaysBreak | ||||
| AlignConsecutiveAssignments: false | ||||
| AlignConsecutiveDeclarations: false | ||||
| AlignEscapedNewlinesLeft: true | ||||
| AlignOperands:   false | ||||
| AlignTrailingComments: false | ||||
| AllowAllParametersOfDeclarationOnNextLine: false | ||||
| AllowShortBlocksOnASingleLine: false | ||||
| AllowShortCaseLabelsOnASingleLine: false | ||||
| AllowShortFunctionsOnASingleLine: Empty | ||||
| AllowShortIfStatementsOnASingleLine: false | ||||
| AllowShortLoopsOnASingleLine: false | ||||
| AlwaysBreakAfterReturnType: None | ||||
| AlwaysBreakBeforeMultilineStrings: true | ||||
| AlwaysBreakTemplateDeclarations: true | ||||
| BinPackArguments: false | ||||
| BinPackParameters: false | ||||
| BraceWrapping: | ||||
|   AfterClass:      false | ||||
|   AfterControlStatement: false | ||||
|   AfterEnum:       false | ||||
|   AfterFunction:   false | ||||
|   AfterNamespace:  false | ||||
|   AfterObjCDeclaration: false | ||||
|   AfterStruct:     false | ||||
|   AfterUnion:      false | ||||
|   BeforeCatch:     false | ||||
|   BeforeElse:      false | ||||
|   IndentBraces:    false | ||||
| BreakBeforeBinaryOperators: None | ||||
| BreakBeforeBraces: Attach | ||||
| BreakBeforeTernaryOperators: true | ||||
| BreakConstructorInitializersBeforeComma: false | ||||
| BreakAfterJavaFieldAnnotations: false | ||||
| BreakStringLiterals: false | ||||
| ColumnLimit:     80 | ||||
| CommentPragmas:  '^ IWYU pragma:' | ||||
| CompactNamespaces: false | ||||
| ConstructorInitializerAllOnOneLineOrOnePerLine: true | ||||
| ConstructorInitializerIndentWidth: 4 | ||||
| ContinuationIndentWidth: 4 | ||||
| Cpp11BracedListStyle: true | ||||
| DerivePointerAlignment: false | ||||
| DisableFormat:   false | ||||
| ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ] | ||||
| IncludeCategories: | ||||
|   - Regex:           '^<.*\.h(pp)?>' | ||||
|     Priority:        1 | ||||
|   - Regex:           '^<.*' | ||||
|     Priority:        2 | ||||
|   - Regex:           '.*' | ||||
|     Priority:        3 | ||||
| IndentCaseLabels: true | ||||
| IndentWidth:     2 | ||||
| IndentWrappedFunctionNames: false | ||||
| KeepEmptyLinesAtTheStartOfBlocks: false | ||||
| MacroBlockBegin: '' | ||||
| MacroBlockEnd:   '' | ||||
| MaxEmptyLinesToKeep: 1 | ||||
| NamespaceIndentation: None | ||||
| ObjCBlockIndentWidth: 2 | ||||
| ObjCSpaceAfterProperty: false | ||||
| ObjCSpaceBeforeProtocolList: false | ||||
| PenaltyBreakBeforeFirstCallParameter: 1 | ||||
| PenaltyBreakComment: 300 | ||||
| PenaltyBreakFirstLessLess: 120 | ||||
| PenaltyBreakString: 1000 | ||||
| PenaltyExcessCharacter: 1000000 | ||||
| PenaltyReturnTypeOnItsOwnLine: 2000000 | ||||
| PointerAlignment: Left | ||||
| ReflowComments:  true | ||||
| SortIncludes:    true | ||||
| SpaceAfterCStyleCast: false | ||||
| SpaceBeforeAssignmentOperators: true | ||||
| SpaceBeforeParens: ControlStatements | ||||
| SpaceInEmptyParentheses: false | ||||
| SpacesBeforeTrailingComments: 1 | ||||
| SpacesInAngles:  false | ||||
| SpacesInContainerLiterals: true | ||||
| SpacesInCStyleCastParentheses: false | ||||
| SpacesInParentheses: false | ||||
| SpacesInSquareBrackets: false | ||||
| Standard:        Cpp11 | ||||
| TabWidth:        8 | ||||
| UseTab:          Never | ||||
| ... | ||||
							
								
								
									
										51
									
								
								.clang-tidy
									
									
									
									
									
								
							
							
						
						
									
										51
									
								
								.clang-tidy
									
									
									
									
									
								
							| @ -1,51 +0,0 @@ | ||||
| --- | ||||
| # NOTE: there must be no spaces before the '-', so put the comma first. | ||||
| Checks: ' | ||||
|   * | ||||
|   ,clang-analyzer-* | ||||
|   ,modernize-* | ||||
|   ,-cert-dcl21-cpp | ||||
|   ,-cert-err58-cpp | ||||
|   ,-cert-err60-cpp | ||||
|   ,-clang-diagnostic-* | ||||
|   ,-cppcoreguidelines-owning-memory | ||||
|   ,-cppcoreguidelines-pro-bounds-array-to-pointer-decay | ||||
|   ,-cppcoreguidelines-pro-bounds-constant-array-index | ||||
|   ,-cppcoreguidelines-pro-type-member-init | ||||
|   ,-cppcoreguidelines-pro-type-static-cast-downcast | ||||
|   ,-cppcoreguidelines-pro-type-union-access | ||||
|   ,-cppcoreguidelines-pro-type-vararg | ||||
|   ,-cppcoreguidelines-special-member-functions | ||||
|   ,-fuchsia-* | ||||
|   ,-google-build-using-namespace | ||||
|   ,-google-default-arguments | ||||
|   ,-google-explicit-constructor | ||||
|   ,-google-readability-braces-around-statements | ||||
|   ,-google-readability-namespace-comments | ||||
|   ,-google-readability-todo | ||||
|   ,-google-runtime-references | ||||
|   ,-google-runtime-references | ||||
|   ,-hicpp-braces-around-statements | ||||
|   ,-hicpp-explicit-conversions | ||||
|   ,-hicpp-member-init | ||||
|   ,-hicpp-no-array-decay | ||||
|   ,-hicpp-signed-bitwise | ||||
|   ,-hicpp-special-member-functions | ||||
|   ,-hicpp-vararg | ||||
|   ,-llvm-header-guard | ||||
|   ,-llvm-include-order | ||||
|   ,-llvm-namespace-comment | ||||
|   ,-misc-unused-parameters | ||||
|   ,-modernize-make-unique | ||||
|   ,-modernize-use-default-member-init | ||||
|   ,-performance-unnecessary-value-param | ||||
|   ,-readability-braces-around-statements | ||||
|   ,-readability-else-after-return | ||||
|   ,-readability-implicit-bool-conversion | ||||
|   ,-readability-named-parameter | ||||
|   ' | ||||
| WarningsAsErrors: '' | ||||
| HeaderFilterRegex: 'torch/csrc/' | ||||
| AnalyzeTemporaryDtors: false | ||||
| CheckOptions: | ||||
| ... | ||||
							
								
								
									
										1
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
								
							| @ -1 +0,0 @@ | ||||
| *.bat	text eol=crlf | ||||
							
								
								
									
										0
									
								
								.github/CONTRIBUTING.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										0
									
								
								.github/CONTRIBUTING.md
									
									
									
									
										vendored
									
									
								
							
							
								
								
									
										38
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										38
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @ -1,38 +0,0 @@ | ||||
| If you have a question or would like help and support, please ask at our | ||||
| [forums](https://discuss.pytorch.org/). | ||||
|  | ||||
| If you are submitting a feature request, please preface the title with [feature request]. | ||||
| If you are submitting a bug report, please fill in the following details. | ||||
|  | ||||
| ## Issue description | ||||
|  | ||||
| Provide a short description. | ||||
|  | ||||
| ## Code example | ||||
|  | ||||
| Please try to provide a minimal example to repro the bug. | ||||
| Error messages and stack traces are also helpful. | ||||
|  | ||||
| ## System Info | ||||
| Please copy and paste the output from our | ||||
| [environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py) | ||||
| (or fill out the checklist below manually). | ||||
|  | ||||
| You can get the script and run it with: | ||||
| ``` | ||||
| wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py | ||||
| # For security purposes, please check the contents of collect_env.py before running it. | ||||
| python collect_env.py | ||||
| ``` | ||||
|  | ||||
| - PyTorch or Caffe2: | ||||
| - How you installed PyTorch (conda, pip, source): | ||||
| - Build command you used (if compiling from source): | ||||
| - OS: | ||||
| - PyTorch version: | ||||
| - Python version: | ||||
| - CUDA/cuDNN version: | ||||
| - GPU models and configuration: | ||||
| - GCC version (if compiling from source): | ||||
| - CMake version: | ||||
| - Versions of any other relevant libraries: | ||||
							
								
								
									
										0
									
								
								.github/PULL_REQUEST_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										0
									
								
								.github/PULL_REQUEST_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
								
								
									
										222
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										222
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,130 +1,43 @@ | ||||
| # READ THIS BEFORE YOU REFACTOR ME | ||||
| # | ||||
| # setup.py uses the list of patterns in this file to decide | ||||
| # what to delete, but it's not 100% sound.  So, for example, | ||||
| # if you delete aten/build/ because it's redundant with build/, | ||||
| # aten/build/ will stop being cleaned.  So be careful when | ||||
| # refactoring this file! | ||||
|  | ||||
| ## PyTorch | ||||
|  | ||||
| .mypy_cache | ||||
| */*.pyc | ||||
| */*.so* | ||||
| build/ | ||||
| dist/ | ||||
| torch.egg-info/ | ||||
| */**/__pycache__ | ||||
| */**/*.dylib* | ||||
| torch/version.py | ||||
| torch/csrc/generic/TensorMethods.cpp | ||||
| torch/lib/*.so* | ||||
| torch/lib/*.a* | ||||
| torch/lib/*.dylib* | ||||
| torch/lib/*.h | ||||
| torch/lib/build | ||||
| torch/lib/tmp_install | ||||
| torch/lib/include | ||||
| torch/lib/torch_shm_manager | ||||
| torch/csrc/jit/generated/* | ||||
| torch/csrc/autograd/generated/* | ||||
| torch/csrc/cudnn/cuDNN.cpp | ||||
| torch/csrc/nn/THNN.cwrap | ||||
| torch/csrc/nn/THNN.cpp | ||||
| torch/csrc/nn/THCUNN.cwrap | ||||
| torch/csrc/nn/THCUNN.cpp | ||||
| torch/csrc/nn/THNN_generic.cwrap | ||||
| torch/csrc/nn/THNN_generic.cpp | ||||
| torch/csrc/nn/THNN_generic.h | ||||
| torch/csrc/generated | ||||
| docs/src/**/* | ||||
| test/data/legacy_modules.t7 | ||||
| test/data/gpu_tensors.pt | ||||
| test/htmlcov | ||||
| test/.coverage | ||||
| */*.pyc | ||||
| */**/*.pyc | ||||
| */**/*.pyd | ||||
| */**/*.so* | ||||
| */**/**/*.pyc | ||||
| */**/**/**/*.pyc | ||||
| */**/**/**/**/*.pyc | ||||
| aten/build/ | ||||
| aten/src/ATen/Config.h | ||||
| aten/src/ATen/cuda/CUDAConfig.h | ||||
| build/ | ||||
| dist/ | ||||
| docs/src/**/* | ||||
| docs/cpp/xml/ | ||||
| docs/cpp/html/ | ||||
| docs/cpp/api/ | ||||
| test/.coverage | ||||
| test/cpp/api/mnist | ||||
| test/custom_operator/model.pt | ||||
| test/data/gpu_tensors.pt | ||||
| test/data/legacy_modules.t7 | ||||
| */*.so* | ||||
| */**/*.so* | ||||
| */**/*.dylib* | ||||
| test/data/legacy_serialized.pt | ||||
| test/data/linear.pt | ||||
| test/htmlcov | ||||
| test/cpp_extensions/install/ | ||||
| third_party/build/ | ||||
| tools/shared/_utils_internal.py | ||||
| torch.egg-info/ | ||||
| torch/csrc/autograd/generated/* | ||||
| torch/csrc/cudnn/cuDNN.cpp | ||||
| torch/csrc/generated | ||||
| torch/csrc/generic/TensorMethods.cpp | ||||
| torch/csrc/jit/generated/* | ||||
| torch/csrc/jit/fusers/Config.h | ||||
| torch/csrc/nn/THCUNN.cpp | ||||
| torch/csrc/nn/THCUNN.cwrap | ||||
| torch/csrc/nn/THNN_generic.cpp | ||||
| torch/csrc/nn/THNN_generic.cwrap | ||||
| torch/csrc/nn/THNN_generic.h | ||||
| torch/csrc/nn/THNN.cpp | ||||
| torch/csrc/nn/THNN.cwrap | ||||
| torch/lib/*.a* | ||||
| torch/lib/*.dll* | ||||
| torch/lib/*.exe* | ||||
| torch/lib/*.dylib* | ||||
| torch/lib/*.h | ||||
| torch/lib/*.lib | ||||
| torch/lib/*.so* | ||||
| torch/lib/build | ||||
| torch/lib/cmake | ||||
| torch/lib/include | ||||
| torch/lib/pkgconfig | ||||
| torch/lib/protoc | ||||
| torch/lib/tmp_install | ||||
| torch/lib/torch_shm_manager | ||||
| torch/lib/python* | ||||
| torch/share/ | ||||
| torch/version.py | ||||
|  | ||||
| # IPython notebook checkpoints | ||||
| .ipynb_checkpoints | ||||
|  | ||||
| # Editor temporaries | ||||
| *.swn | ||||
| *.swo | ||||
| *.swp | ||||
| *.swm | ||||
| *~ | ||||
|  | ||||
| # macOS dir files | ||||
| .DS_Store | ||||
|  | ||||
| # Symbolic files | ||||
| tools/shared/cwrap_common.py | ||||
|  | ||||
| # Ninja files | ||||
| .ninja_deps | ||||
| .ninja_log | ||||
| compile_commands.json | ||||
| *.egg-info/ | ||||
| docs/source/scripts/activation_images/ | ||||
|  | ||||
| ## General | ||||
|  | ||||
| # Compiled Object files | ||||
| *.slo | ||||
| *.lo | ||||
| *.o | ||||
| *.cuo | ||||
| *.obj | ||||
|  | ||||
| # Compiled Dynamic libraries | ||||
| *.so | ||||
| *.dylib | ||||
| *.dll | ||||
|  | ||||
| # Compiled Static libraries | ||||
| *.lai | ||||
| *.la | ||||
| *.a | ||||
| *.lib | ||||
|  | ||||
| # Compiled protocol buffers | ||||
| *.pb.h | ||||
| *.pb.cc | ||||
| *_pb2.py | ||||
|  | ||||
| # Compiled python | ||||
| *.pyc | ||||
| *.pyd | ||||
|  | ||||
| # Compiled MATLAB | ||||
| *.mex* | ||||
|  | ||||
| # IPython notebook checkpoints | ||||
| .ipynb_checkpoints | ||||
| @ -135,75 +48,6 @@ docs/source/scripts/activation_images/ | ||||
| *.swp | ||||
| *~ | ||||
|  | ||||
| # Sublime Text settings | ||||
| *.sublime-workspace | ||||
| *.sublime-project | ||||
|  | ||||
| # Eclipse Project settings | ||||
| *.*project | ||||
| .settings | ||||
|  | ||||
| # QtCreator files | ||||
| *.user | ||||
|  | ||||
| # PyCharm files | ||||
| .idea | ||||
|  | ||||
| # OSX dir files | ||||
| .DS_Store | ||||
|  | ||||
| ## Caffe2 | ||||
|  | ||||
| # build, distribute, and bins (+ python proto bindings) | ||||
| build | ||||
| build_host_protoc | ||||
| build_android | ||||
| build_ios | ||||
| /build_* | ||||
| .build_debug/* | ||||
| .build_release/* | ||||
| distribute/* | ||||
| *.testbin | ||||
| *.bin | ||||
| cmake_build | ||||
| .cmake_build | ||||
| gen | ||||
| .setuptools-cmake-build | ||||
| .pytest_cache | ||||
| aten/build/* | ||||
|  | ||||
| # Bram | ||||
| plsdontbreak | ||||
|  | ||||
| # Generated documentation | ||||
| docs/_site | ||||
| docs/gathered | ||||
| _site | ||||
| doxygen | ||||
| docs/dev | ||||
|  | ||||
| # LevelDB files | ||||
| *.sst | ||||
| *.ldb | ||||
| LOCK | ||||
| LOG* | ||||
| CURRENT | ||||
| MANIFEST-* | ||||
|  | ||||
| # generated version file | ||||
| caffe2/version.py | ||||
|  | ||||
| # setup.py intermediates | ||||
| .eggs | ||||
| caffe2.egg-info | ||||
|  | ||||
| # Atom/Watchman required file | ||||
| .watchmanconfig | ||||
|  | ||||
| # BEGIN NOT-CLEAN-FILES (setup.py handles this marker. Do not change.) | ||||
| # | ||||
| # Below files are not deleted by "setup.py clean". | ||||
|  | ||||
| # Visual Studio Code files | ||||
| .vscode | ||||
| .vs | ||||
|  | ||||
							
								
								
									
										85
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										85
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							| @ -1,78 +1,9 @@ | ||||
| [submodule "third_party/catch"] | ||||
| 	path = third_party/catch | ||||
| 	url = https://github.com/catchorg/Catch2.git | ||||
| [submodule "third_party/pybind11"] | ||||
| 	path = third_party/pybind11 | ||||
| 	url = https://github.com/pybind/pybind11.git | ||||
| [submodule "third_party/cub"] | ||||
| 	path = third_party/cub | ||||
| 	url = https://github.com/NVlabs/cub.git | ||||
| [submodule "third_party/eigen"] | ||||
| 	path = third_party/eigen | ||||
| 	url = https://github.com/eigenteam/eigen-git-mirror.git | ||||
| [submodule "third_party/googletest"] | ||||
| 	path = third_party/googletest | ||||
| 	url = https://github.com/google/googletest.git | ||||
| [submodule "third_party/nervanagpu"] | ||||
| 	path = third_party/nervanagpu | ||||
| 	url = https://github.com/NervanaSystems/nervanagpu.git | ||||
| [submodule "third_party/benchmark"] | ||||
| 	path = third_party/benchmark | ||||
| 	url = https://github.com/google/benchmark.git | ||||
| [submodule "third_party/protobuf"] | ||||
| 	path = third_party/protobuf | ||||
| 	url = https://github.com/google/protobuf.git | ||||
| [submodule "third_party/ios-cmake"] | ||||
| 	path = third_party/ios-cmake | ||||
| 	url = https://github.com/Yangqing/ios-cmake.git | ||||
| [submodule "third_party/NNPACK"] | ||||
| 	path = third_party/NNPACK | ||||
| 	url = https://github.com/Maratyszcza/NNPACK.git | ||||
| [submodule "third_party/gloo"] | ||||
| 	path = third_party/gloo | ||||
| [submodule "torch/lib/gloo"] | ||||
| 	path = torch/lib/gloo | ||||
| 	url = https://github.com/facebookincubator/gloo | ||||
| [submodule "third_party/NNPACK_deps/pthreadpool"] | ||||
| 	path = third_party/pthreadpool | ||||
| 	url = https://github.com/Maratyszcza/pthreadpool.git | ||||
| [submodule "third_party/NNPACK_deps/FXdiv"] | ||||
| 	path = third_party/FXdiv | ||||
| 	url = https://github.com/Maratyszcza/FXdiv.git | ||||
| [submodule "third_party/NNPACK_deps/FP16"] | ||||
| 	path = third_party/FP16 | ||||
| 	url = https://github.com/Maratyszcza/FP16.git | ||||
| [submodule "third_party/NNPACK_deps/psimd"] | ||||
| 	path = third_party/psimd | ||||
| 	url = https://github.com/Maratyszcza/psimd.git | ||||
| [submodule "third_party/zstd"] | ||||
| 	path = third_party/zstd | ||||
| 	url = https://github.com/facebook/zstd.git | ||||
| [submodule "third-party/cpuinfo"] | ||||
| 	path = third_party/cpuinfo | ||||
| 	url = https://github.com/Maratyszcza/cpuinfo.git | ||||
| [submodule "third_party/python-enum"] | ||||
| 	path = third_party/python-enum | ||||
| 	url = https://github.com/PeachPy/enum34.git | ||||
| [submodule "third_party/python-peachpy"] | ||||
| 	path = third_party/python-peachpy | ||||
| 	url = https://github.com/Maratyszcza/PeachPy.git | ||||
| [submodule "third_party/python-six"] | ||||
| 	path = third_party/python-six | ||||
| 	url = https://github.com/benjaminp/six.git | ||||
| [submodule "third_party/ComputeLibrary"] | ||||
| 	path = third_party/ComputeLibrary | ||||
| 	url = https://github.com/ARM-software/ComputeLibrary.git | ||||
| [submodule "third_party/onnx"] | ||||
| 	path = third_party/onnx | ||||
| 	url = https://github.com/onnx/onnx.git | ||||
| [submodule "third_party/cereal"] | ||||
| 	path = third_party/cereal | ||||
| 	url = https://github.com/USCiLab/cereal | ||||
| [submodule "third_party/onnx-tensorrt"] | ||||
| 	path = third_party/onnx-tensorrt | ||||
| 	url = https://github.com/onnx/onnx-tensorrt | ||||
| [submodule "third_party/sleef"] | ||||
| 	path = third_party/sleef | ||||
| 	url = https://github.com/shibatch/sleef | ||||
| [submodule "third_party/ideep"] | ||||
| 	path = third_party/ideep | ||||
| 	url = https://github.com/intel/ideep | ||||
| [submodule "torch/lib/pybind11"] | ||||
| 	path = torch/lib/pybind11 | ||||
| 	url = https://github.com/pybind/pybind11 | ||||
| [submodule "torch/lib/nanopb"] | ||||
| 	path = torch/lib/nanopb | ||||
| 	url = https://github.com/nanopb/nanopb.git | ||||
|  | ||||
| @ -1,14 +0,0 @@ | ||||
| # Jenkins | ||||
|  | ||||
| The scripts in this directory are the entrypoint for testing Caffe2. | ||||
|  | ||||
| The environment variable `BUILD_ENVIRONMENT` is expected to be set to | ||||
| the build environment you intend to test. It is a hint for the build | ||||
| and test scripts to configure Caffe2 a certain way and include/exclude | ||||
| tests. Docker images, they equal the name of the image itself. For | ||||
| example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are | ||||
| built on Jenkins and are used in triggered builds already have this | ||||
| environment variable set in their manifest. Also see | ||||
| `./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`. | ||||
|  | ||||
| Our Jenkins installation is located at https://ci.pytorch.org/jenkins/. | ||||
| @ -1,282 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| pip install --user --no-cache-dir hypothesis==3.59.0 | ||||
|  | ||||
|  | ||||
| # The INSTALL_PREFIX here must match up with test.sh | ||||
| INSTALL_PREFIX="/usr/local/caffe2" | ||||
| LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) | ||||
| ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd) | ||||
| CMAKE_ARGS=() | ||||
| SCCACHE="$(which sccache)" | ||||
|  | ||||
| if [ "$(which gcc)" != "/root/sccache/gcc" ]; then | ||||
|   # Setup SCCACHE | ||||
|   ############################################################################### | ||||
|   # Setup sccache if SCCACHE_BUCKET is set | ||||
|   if [ -n "${SCCACHE_BUCKET}" ]; then | ||||
|     mkdir -p ./sccache | ||||
|  | ||||
|     SCCACHE="$(which sccache)" | ||||
|     if [ -z "${SCCACHE}" ]; then | ||||
|       echo "Unable to find sccache..." | ||||
|       exit 1 | ||||
|     fi | ||||
|  | ||||
|     # Setup wrapper scripts | ||||
|     for compiler in cc c++ gcc g++ x86_64-linux-gnu-gcc; do | ||||
|       ( | ||||
|         echo "#!/bin/sh" | ||||
|         echo "exec $SCCACHE $(which $compiler) \"\$@\"" | ||||
|       ) > "./sccache/$compiler" | ||||
|       chmod +x "./sccache/$compiler" | ||||
|     done | ||||
|  | ||||
|     if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then | ||||
|       ( | ||||
|         echo "#!/bin/sh" | ||||
|         echo "exec $SCCACHE $(which nvcc) \"\$@\"" | ||||
|       ) > "./sccache/nvcc" | ||||
|       chmod +x "./sccache/nvcc" | ||||
|     fi | ||||
|  | ||||
|     export CACHE_WRAPPER_DIR="$PWD/sccache" | ||||
|  | ||||
|     # CMake must find these wrapper scripts | ||||
|     export PATH="$CACHE_WRAPPER_DIR:$PATH" | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| # Setup ccache if configured to use it (and not sccache) | ||||
| if [ -z "${SCCACHE}" ] && which ccache > /dev/null; then | ||||
|   mkdir -p ./ccache | ||||
|   ln -sf "$(which ccache)" ./ccache/cc | ||||
|   ln -sf "$(which ccache)" ./ccache/c++ | ||||
|   ln -sf "$(which ccache)" ./ccache/gcc | ||||
|   ln -sf "$(which ccache)" ./ccache/g++ | ||||
|   ln -sf "$(which ccache)" ./ccache/x86_64-linux-gnu-gcc | ||||
|   if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]]; then | ||||
|     ln -sf "$(which ccache)" ./ccache/nvcc | ||||
|   fi | ||||
|   export CACHE_WRAPPER_DIR="$PWD/ccache" | ||||
|   export PATH="$CACHE_WRAPPER_DIR:$PATH" | ||||
| fi | ||||
|  | ||||
| # sccache will fail for CUDA builds if all cores are used for compiling | ||||
| if [ -z "$MAX_JOBS" ]; then | ||||
|   if [[ "${BUILD_ENVIRONMENT}" == *-cuda* ]] && [ -n "${SCCACHE}" ]; then | ||||
|     MAX_JOBS=`expr $(nproc) - 1` | ||||
|   else | ||||
|     MAX_JOBS=$(nproc) | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| report_compile_cache_stats() { | ||||
|   if [[ -n "${SCCACHE}" ]]; then | ||||
|     "$SCCACHE" --show-stats | ||||
|   elif which ccache > /dev/null; then | ||||
|     ccache -s | ||||
|   fi | ||||
| } | ||||
|  | ||||
| ############################################################################### | ||||
| # Explicitly set Python executable. | ||||
| ############################################################################### | ||||
| # On Ubuntu 16.04 the default Python is still 2.7. | ||||
| PYTHON="$(which python)" | ||||
| if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then | ||||
|   PYTHON=$(which "python${BASH_REMATCH[1]}") | ||||
|   CMAKE_ARGS+=("-DPYTHON_EXECUTABLE=${PYTHON}") | ||||
| fi | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # Use special scripts for Android, conda, and setup builds | ||||
| ############################################################################### | ||||
| if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then | ||||
|   export ANDROID_NDK=/opt/ndk | ||||
|   CMAKE_ARGS+=("-DBUILD_BINARY=ON") | ||||
|   CMAKE_ARGS+=("-DBUILD_TEST=ON") | ||||
|   CMAKE_ARGS+=("-DUSE_OBSERVERS=ON") | ||||
|   CMAKE_ARGS+=("-DUSE_ZSTD=ON") | ||||
|   "${ROOT_DIR}/scripts/build_android.sh" ${CMAKE_ARGS[*]} "$@" | ||||
|   exit 0 | ||||
| elif [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then | ||||
|   "${ROOT_DIR}/scripts/build_anaconda.sh" --skip-tests --install-locally "$@" | ||||
|   report_compile_cache_stats | ||||
|  | ||||
|   # This build will be tested against onnx tests, which needs onnx installed. | ||||
|   # At this point the visible protbuf installation will be in conda, since one | ||||
|   # of Caffe2's dependencies uses conda, so the correct protobuf include | ||||
|   # headers are those in conda as well | ||||
|   # This path comes from install_anaconda.sh which installs Anaconda into the | ||||
|   # docker image | ||||
|   PROTOBUF_INCDIR=/opt/conda/include pip install -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx" | ||||
|   report_compile_cache_stats | ||||
|   exit 0 | ||||
| fi | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # Set cmake args | ||||
| ############################################################################### | ||||
| CMAKE_ARGS+=("-DBUILD_BINARY=ON") | ||||
| CMAKE_ARGS+=("-DBUILD_TEST=ON") | ||||
| CMAKE_ARGS+=("-DINSTALL_TEST=ON") | ||||
| CMAKE_ARGS+=("-DUSE_OBSERVERS=ON") | ||||
| CMAKE_ARGS+=("-DUSE_ZSTD=ON") | ||||
| CMAKE_ARGS+=("-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}") | ||||
|  | ||||
| if [[ $BUILD_ENVIRONMENT == *mkl* ]]; then | ||||
|   CMAKE_ARGS+=("-DBLAS=MKL") | ||||
| fi | ||||
| if [[ $BUILD_ENVIRONMENT == *cuda* ]]; then | ||||
|   CMAKE_ARGS+=("-DUSE_CUDA=ON") | ||||
|   CMAKE_ARGS+=("-DCUDA_ARCH_NAME=Maxwell") | ||||
|   CMAKE_ARGS+=("-DUSE_NNPACK=OFF") | ||||
|  | ||||
|   # Explicitly set path to NVCC such that the symlink to ccache or sccache is used | ||||
|   CMAKE_ARGS+=("-DCUDA_NVCC_EXECUTABLE=${CACHE_WRAPPER_DIR}/nvcc") | ||||
|  | ||||
|   # Ensure FindCUDA.cmake can infer the right path to the CUDA toolkit. | ||||
|   # Setting PATH to resolve to the right nvcc alone isn't enough. | ||||
|   # See /usr/share/cmake-3.5/Modules/FindCUDA.cmake, block at line 589. | ||||
|   export CUDA_PATH="/usr/local/cuda" | ||||
|  | ||||
|   # Ensure the ccache symlink can still find the real nvcc binary. | ||||
|   export PATH="/usr/local/cuda/bin:$PATH" | ||||
| fi | ||||
| if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then | ||||
|   # TODO: This is patching the official FindHip to properly handly | ||||
|   # cmake generator expression. A PR is opened in the upstream repo here: | ||||
|   # https://github.com/ROCm-Developer-Tools/HIP/pull/516 | ||||
|   # remove this hack once it's merged. | ||||
|   if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then | ||||
|     sudo sed -i 's/\ -I${dir}/\ $<$<BOOL:${dir}>:-I${dir}>/' /opt/rocm/hip/cmake/FindHIP.cmake | ||||
|   fi | ||||
|  | ||||
|   export LANG=C.UTF-8 | ||||
|   export LC_ALL=C.UTF-8 | ||||
|   export HCC_AMDGPU_TARGET=gfx900 | ||||
|  | ||||
|   # The link time of libcaffe2_hip.so takes 40 minutes, according to | ||||
|   # https://github.com/RadeonOpenCompute/hcc#thinlto-phase-1---implemented | ||||
|   # using using ThinLTO could significantly improve link-time performance. | ||||
|   export KMTHINLTO=1 | ||||
|  | ||||
|   ########## HIPIFY Caffe2 operators | ||||
|   ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_pytorch_amd.py" | ||||
|   ${PYTHON} "${ROOT_DIR}/tools/amd_build/build_caffe2_amd.py" | ||||
| fi | ||||
|  | ||||
| # Try to include Redis support for Linux builds | ||||
| if [ "$(uname)" == "Linux" ]; then | ||||
|   CMAKE_ARGS+=("-DUSE_REDIS=ON") | ||||
| fi | ||||
|  | ||||
| # Currently, on Jenkins mac os, we will use custom protobuf. Mac OS | ||||
| # contbuild at the moment is minimal dependency - it doesn't use glog | ||||
| # or gflags either. | ||||
| if [ "$(uname)" == "Darwin" ]; then | ||||
|   CMAKE_ARGS+=("-DBUILD_CUSTOM_PROTOBUF=ON") | ||||
| fi | ||||
|  | ||||
| # Use a speciallized onnx namespace in CI to catch hardcoded onnx namespace | ||||
| CMAKE_ARGS+=("-DONNX_NAMESPACE=ONNX_NAMESPACE_FOR_C2_CI") | ||||
|  | ||||
| # We test the presence of cmake3 (for platforms like Centos and Ubuntu 14.04) | ||||
| # and use that if so. | ||||
| if [[ -x "$(command -v cmake3)" ]]; then | ||||
|     CMAKE_BINARY=cmake3 | ||||
| else | ||||
|     CMAKE_BINARY=cmake | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Configure and make | ||||
| ############################################################################### | ||||
|  | ||||
| if [[ -z "$INTEGRATED" ]]; then | ||||
|  | ||||
|   # Run cmake from ./build_caffe2 directory so it doesn't conflict with | ||||
|   # standard PyTorch build directory. Eventually these won't need to | ||||
|   # be separate. | ||||
|   rm -rf build_caffe2 | ||||
|   mkdir build_caffe2 | ||||
|   cd ./build_caffe2 | ||||
|  | ||||
|   # Configure | ||||
|   ${CMAKE_BINARY} "${ROOT_DIR}" ${CMAKE_ARGS[*]} "$@" | ||||
|  | ||||
|   # Build | ||||
|   if [ "$(uname)" == "Linux" ]; then | ||||
|     make "-j${MAX_JOBS}" install | ||||
|   else | ||||
|     echo "Don't know how to build on $(uname)" | ||||
|     exit 1 | ||||
|   fi | ||||
|  | ||||
| else | ||||
|  | ||||
|   # sccache will be stuck if  all cores are used for compiling | ||||
|   # see https://github.com/pytorch/pytorch/pull/7361 | ||||
|   if [[ -n "${SCCACHE}" ]]; then | ||||
|     export MAX_JOBS=`expr $(nproc) - 1` | ||||
|   fi | ||||
|  | ||||
|   USE_LEVELDB=1 USE_LMDB=1 USE_OPENCV=1 BUILD_BINARY=1 python setup.py install --user | ||||
|  | ||||
|   # This is to save test binaries for testing | ||||
|   cp -r torch/lib/tmp_install $INSTALL_PREFIX | ||||
|  | ||||
|   ls $INSTALL_PREFIX | ||||
|  | ||||
|   report_compile_cache_stats | ||||
| fi | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # Install ONNX | ||||
| ############################################################################### | ||||
|  | ||||
| # Install ONNX into a local directory | ||||
| pip install --user -b /tmp/pip_install_onnx "file://${ROOT_DIR}/third_party/onnx#egg=onnx" | ||||
|  | ||||
| report_compile_cache_stats | ||||
|  | ||||
| # Symlink the caffe2 base python path into the system python path, | ||||
| # so that we can import caffe2 without having to change $PYTHONPATH. | ||||
| # Run in a subshell to contain environment set by /etc/os-release. | ||||
| # | ||||
| # This is only done when running on Jenkins!  We don't want to pollute | ||||
| # the user environment with Python symlinks and ld.so.conf.d hacks. | ||||
| # | ||||
| if [[ -z "$INTEGRATED" ]]; then | ||||
|   if [ -n "${JENKINS_URL}" ]; then | ||||
|     ( | ||||
|       source /etc/os-release | ||||
|  | ||||
|       function python_version() { | ||||
|         "$PYTHON" -c 'import sys; print("python%d.%d" % sys.version_info[0:2])' | ||||
|       } | ||||
|  | ||||
|       # Debian/Ubuntu | ||||
|       if [[ "$ID_LIKE" == *debian* ]]; then | ||||
|         python_path="/usr/local/lib/$(python_version)/dist-packages" | ||||
|         sudo ln -sf "${INSTALL_PREFIX}/caffe2" "${python_path}" | ||||
|       fi | ||||
|  | ||||
|       # RHEL/CentOS | ||||
|       if [[ "$ID_LIKE" == *rhel* ]]; then | ||||
|         python_path="/usr/lib64/$(python_version)/site-packages/" | ||||
|         sudo ln -sf "${INSTALL_PREFIX}/caffe2" "${python_path}" | ||||
|       fi | ||||
|  | ||||
|       # /etc/ld.so.conf.d is used on both Debian and RHEL | ||||
|       echo "${INSTALL_PREFIX}/lib" | sudo tee /etc/ld.so.conf.d/caffe2.conf | ||||
|       sudo ldconfig | ||||
|     ) | ||||
|   fi | ||||
| fi | ||||
| @ -1,7 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -ex | ||||
| upstream="$1" | ||||
| pr="$2" | ||||
| git diff --name-only "$upstream" "$pr" | ||||
| # For safety, unconditionally trigger for any changes. | ||||
| #git diff --name-only "$upstream" "$pr" | grep -Eq '^(CMakeLists.txt|Makefile|.gitmodules|.jenkins/caffe2|binaries|caffe|caffe2|cmake|conda|docker|docs/caffe2|modules|scripts|third_party)' | ||||
| @ -1,153 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) | ||||
| ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd) | ||||
| TEST_DIR=$ROOT_DIR/caffe2_tests | ||||
|  | ||||
| # Figure out which Python to use | ||||
| PYTHON="python" | ||||
| if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then | ||||
|   PYTHON="python${BASH_REMATCH[1]}" | ||||
| fi | ||||
|  | ||||
| # The prefix must mirror the setting from build.sh | ||||
| INSTALL_PREFIX="/usr/local/caffe2" | ||||
|  | ||||
| # Anaconda builds have a special install prefix and python | ||||
| if [[ "$BUILD_ENVIRONMENT" == conda* ]]; then | ||||
|   # This path comes from install_anaconda.sh which installs Anaconda into the | ||||
|   # docker image | ||||
|   PYTHON="/opt/conda/bin/python" | ||||
|   INSTALL_PREFIX="/opt/conda/" | ||||
| fi | ||||
|  | ||||
| # Add the site-packages in the caffe2 install prefix to the PYTHONPATH | ||||
| SITE_DIR=$($PYTHON -c "from distutils import sysconfig; print(sysconfig.get_python_lib(prefix=''))") | ||||
| INSTALL_SITE_DIR="${INSTALL_PREFIX}/${SITE_DIR}" | ||||
|  | ||||
| # Skip tests in environments where they are not built/applicable | ||||
| if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then | ||||
|   echo 'Skipping tests' | ||||
|   exit 0 | ||||
| fi | ||||
|  | ||||
| # Set PYTHONPATH and LD_LIBRARY_PATH so that python can find the installed | ||||
| # Caffe2. This shouldn't be done on Anaconda, as Anaconda should handle this. | ||||
| if [[ "$BUILD_ENVIRONMENT" != conda* ]]; then | ||||
|   export PYTHONPATH="${PYTHONPATH}:$INSTALL_SITE_DIR" | ||||
|   export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${INSTALL_PREFIX}/lib" | ||||
| fi | ||||
|  | ||||
| cd "$ROOT_DIR" | ||||
|  | ||||
| if [ -d $TEST_DIR ]; then | ||||
|   echo "Directory $TEST_DIR already exists; please remove it..." | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| mkdir -p $TEST_DIR/{cpp,python} | ||||
|  | ||||
| cd "${WORKSPACE}" | ||||
|  | ||||
| # C++ tests | ||||
| echo "Running C++ tests.." | ||||
| gtest_reports_dir="${TEST_DIR}/cpp" | ||||
| junit_reports_dir="${TEST_DIR}/junit_reports" | ||||
| mkdir -p "$gtest_reports_dir" "$junit_reports_dir" | ||||
| for test in $(find "${INSTALL_PREFIX}/test" -executable -type f); do | ||||
|   case "$test" in | ||||
|     # skip tests we know are hanging or bad | ||||
|     */mkl_utils_test|*/aten/integer_divider_test) | ||||
|       continue | ||||
|       ;; | ||||
|     */scalar_tensor_test|*/basic|*/native_test) | ||||
| 	  if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then | ||||
| 		continue | ||||
| 	  else | ||||
| 	    "$test" | ||||
| 	  fi | ||||
| 	  ;; | ||||
| 	*) | ||||
|       # Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While | ||||
|       # planning to migrate to gtest as the common PyTorch c++ test suite, we | ||||
|       # currently do NOT use the xml test reporter, because Catch doesn't | ||||
|       # support multiple reporters | ||||
|       # c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223 | ||||
|       # which means that enabling XML output means you lose useful stdout | ||||
|       # output for Jenkins.  It's more important to have useful console | ||||
|       # output than it is to have XML output for Jenkins. | ||||
|       # Note: in the future, if we want to use xml test reporter once we switch | ||||
|       # to all gtest, one can simply do: | ||||
|       # "$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml" | ||||
|       "$test" | ||||
|       ;; | ||||
|   esac | ||||
| done | ||||
|  | ||||
| # Get the relative path to where the caffe2 python module was installed | ||||
| CAFFE2_PYPATH="$INSTALL_SITE_DIR/caffe2" | ||||
|  | ||||
| # Collect additional tests to run (outside caffe2/python) | ||||
| EXTRA_TESTS=() | ||||
|  | ||||
| # CUDA builds always include NCCL support | ||||
| if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]]; then | ||||
|   EXTRA_TESTS+=("$CAFFE2_PYPATH/contrib/nccl") | ||||
| fi | ||||
|  | ||||
| conda_ignore_test=() | ||||
| if [[ $BUILD_ENVIRONMENT == conda* ]]; then | ||||
|   # These tests both assume Caffe2 was built with leveldb, which is not the case | ||||
|   conda_ignore_test+=("--ignore $CAFFE2_PYPATH/python/dataio_test.py") | ||||
|   conda_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/checkpoint_test.py") | ||||
| fi | ||||
|  | ||||
| rocm_ignore_test=() | ||||
| if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then | ||||
|   export LANG=C.UTF-8 | ||||
|   export LC_ALL=C.UTF-8 | ||||
|  | ||||
|   # Currently these tests are failing on ROCM platform: | ||||
|  | ||||
|   # Unknown reasons, need to debug | ||||
|   rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/arg_ops_test.py") | ||||
|   rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/piecewise_linear_transform_test.py") | ||||
|   rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/softmax_ops_test.py") | ||||
|   rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/unique_ops_test.py") | ||||
|  | ||||
|   # Need to go through roi ops to replace max(...) with fmaxf(...) | ||||
|   rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/roi_align_rotated_op_test.py") | ||||
|  | ||||
|   # Our cuda top_k op has some asm code, the hipified version doesn't | ||||
|   # compile yet, so we don't have top_k operator for now | ||||
|   rocm_ignore_test+=("--ignore $CAFFE2_PYPATH/python/operator_test/top_k_test.py") | ||||
|  | ||||
|   # Our AMD CI boxes have 4 gpus on each | ||||
|   # Remove this once we have added multi-gpu support | ||||
|   export HIP_VISIBLE_DEVICES=$(($BUILD_NUMBER % 4)) | ||||
| fi | ||||
|  | ||||
| # Python tests | ||||
| echo "Running Python tests.." | ||||
| "$PYTHON" \ | ||||
|   -m pytest \ | ||||
|   -x \ | ||||
|   -v \ | ||||
|   --junit-xml="$TEST_DIR/python/result.xml" \ | ||||
|   --ignore "$CAFFE2_PYPATH/python/test/executor_test.py" \ | ||||
|   --ignore "$CAFFE2_PYPATH/python/operator_test/matmul_op_test.py" \ | ||||
|   --ignore "$CAFFE2_PYPATH/python/operator_test/pack_ops_test.py" \ | ||||
|   --ignore "$CAFFE2_PYPATH/python/mkl/mkl_sbn_speed_test.py" \ | ||||
|   ${conda_ignore_test[@]} \ | ||||
|   ${rocm_ignore_test[@]} \ | ||||
|   "$CAFFE2_PYPATH/python" \ | ||||
|   "${EXTRA_TESTS[@]}" | ||||
|  | ||||
| cd ${INSTALL_PREFIX} | ||||
|  | ||||
| if [[ -n "$INTEGRATED" ]]; then | ||||
|   pip install --user torchvision | ||||
|   "$ROOT_DIR/scripts/onnx/test.sh" | ||||
| fi | ||||
| @ -1,42 +0,0 @@ | ||||
| This directory contains scripts for our continuous integration. | ||||
|  | ||||
| One important thing to keep in mind when reading the scripts here is | ||||
| that they are all based off of Docker images, which we build for each of | ||||
| the various system configurations we want to run on Jenkins.  This means | ||||
| it is very easy to run these tests yourself: | ||||
|  | ||||
| 1. Figure out what Docker image you want.  The general template for our | ||||
|    images look like: | ||||
|    ``registry.pytorch.org/pytorch/pytorch-$BUILD_ENVIRONMENT:$DOCKER_VERSION``, | ||||
|    where ``$BUILD_ENVIRONMENT`` is one of the build environments | ||||
|    enumerated in | ||||
|    [pytorch-dockerfiles](https://github.com/pietern/pytorch-dockerfiles/blob/master/build.sh) | ||||
|  | ||||
| 2. Run ``docker -it -u jenkins $DOCKER_IMAGE``, clone PyTorch and | ||||
|    run one of the scripts in this directory. | ||||
|  | ||||
| The Docker images are designed so that any "reasonable" build commands | ||||
| will work; if you look in [build.sh](build.sh) you will see that it is a | ||||
| very simple script.  This is intentional.  Idiomatic build instructions | ||||
| should work inside all of our Docker images.  You can tweak the commands | ||||
| however you need (e.g., in case you want to rebuild with DEBUG, or rerun | ||||
| the build with higher verbosity, etc.). | ||||
|  | ||||
| We have to do some work to make this so.  Here is a summary of the | ||||
| mechanisms we use: | ||||
|  | ||||
| - We install binaries to directories like `/usr/local/bin` which | ||||
|   are automatically part of your PATH. | ||||
|  | ||||
| - We add entries to the PATH using Docker ENV variables (so | ||||
|   they apply when you enter Docker) and `/etc/environment` (so they | ||||
|   continue to apply even if you sudo), instead of modifying | ||||
|   `PATH` in our build scripts. | ||||
|  | ||||
| - We use `/etc/ld.so.conf.d` to register directories containing | ||||
|   shared libraries, instead of modifying `LD_LIBRARY_PATH` in our | ||||
|   build scripts. | ||||
|  | ||||
| - We reroute well known paths like `/usr/bin/gcc` to alternate | ||||
|   implementations with `update-alternatives, instead of setting | ||||
|   `CC` and `CXX` in our implementations. | ||||
| @ -1,21 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # Required environment variable: $BUILD_ENVIRONMENT | ||||
| # (This is set by default in the Docker images we build, so you don't | ||||
| # need to set it yourself. | ||||
|  | ||||
| COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-build" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| echo "Clang version:" | ||||
| clang --version | ||||
|  | ||||
| # detect_leaks=0: Python is very leaky, so we need suppress it | ||||
| # symbolize=1: Gives us much better errors when things go wrong | ||||
| export ASAN_OPTIONS=detect_leaks=0:symbolize=1 | ||||
|  | ||||
| # TODO: Make the ASAN flags a more unified env var | ||||
| CC="clang" CXX="clang++" LDSHARED="clang --shared" \ | ||||
|   CFLAGS="-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -shared-libasan" \ | ||||
|   NO_CUDA=1 \ | ||||
|   python setup.py install | ||||
| @ -1,145 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # For distributed, four environmental configs: | ||||
| # (1) build with only NCCL | ||||
| # (2) build with NCCL and MPI | ||||
| # (3) build with only MPI | ||||
| # (4) build with neither | ||||
| if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then | ||||
|   # TODO: move this to Docker | ||||
|   sudo apt-get update | ||||
|   sudo apt-get install -y --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0 | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda8-* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]] || [[ "$BUILD_ENVIRONMENT" == *-trusty-py2.7.9* ]]; then | ||||
|   # TODO: move this to Docker | ||||
|   sudo apt-get update | ||||
|   sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev | ||||
|   sudo apt-get install -y --no-install-recommends openssh-client openssh-server | ||||
|   sudo mkdir -p /var/run/sshd | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == "pytorch-linux-xenial-py3-clang5-asan" ]]; then | ||||
|   exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" $* | ||||
| fi | ||||
|  | ||||
| # Required environment variable: $BUILD_ENVIRONMENT | ||||
| # (This is set by default in the Docker images we build, so you don't | ||||
| # need to set it yourself. | ||||
|  | ||||
| COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-build" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| echo "Python version:" | ||||
| python --version | ||||
|  | ||||
| echo "GCC version:" | ||||
| gcc --version | ||||
|  | ||||
| echo "CMake version:" | ||||
| cmake --version | ||||
|  | ||||
| # TODO: Don't run this... | ||||
| pip install -r requirements.txt || true | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then | ||||
|   # This is necessary in order to cross compile (or else we'll have missing GPU device). | ||||
|   export HCC_AMDGPU_TARGET=gfx900 | ||||
|  | ||||
|   # These environment variables are not set on CI when we were running as the Jenkins user. | ||||
|   # The HIP Utility scripts require these environment variables to be set in order to run without error. | ||||
|   export LANG=C.UTF-8 | ||||
|   export LC_ALL=C.UTF-8 | ||||
|  | ||||
|   # This environment variable enabled HCC Optimizations that speed up the linking stage. | ||||
|   # https://github.com/RadeonOpenCompute/hcc#hcc-with-thinlto-linking | ||||
|   export KMTHINLTO=1 | ||||
|  | ||||
|   # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime | ||||
|   sudo apt-get install libc++1 | ||||
|   sudo apt-get install libc++abi1 | ||||
|  | ||||
|   python tools/amd_build/build_pytorch_amd.py | ||||
|   python tools/amd_build/build_caffe2_amd.py | ||||
|   USE_ROCM=1 python setup.py install --user | ||||
|   exit 0 | ||||
| fi | ||||
|  | ||||
| # TODO: Don't install this here | ||||
| if ! which conda; then | ||||
|   pip install mkl mkl-devel | ||||
| fi | ||||
|  | ||||
| # sccache will fail for CUDA builds if all cores are used for compiling | ||||
| # gcc 7 with sccache seems to have intermittent OOM issue if all cores are used | ||||
| if [ -z "$MAX_JOBS" ]; then | ||||
|   if ([[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]) && which sccache > /dev/null; then | ||||
|     export MAX_JOBS=`expr $(nproc) - 1` | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| # Target only our CI GPU machine's CUDA arch to speed up the build | ||||
| export TORCH_CUDA_ARCH_LIST="5.2" | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *ppc64le* ]]; then | ||||
|   export TORCH_CUDA_ARCH_LIST="6.0" | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *trusty-py3.6-gcc5.4* ]]; then | ||||
|   export DEBUG=1 | ||||
| fi | ||||
|  | ||||
| # ppc64le build fails when WERROR=1 | ||||
| # set only when building other architectures | ||||
| # only use for "python setup.py install" line | ||||
| if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then | ||||
|   WERROR=1 python setup.py install | ||||
| elif [[ "$BUILD_ENVIRONMENT" == *ppc64le* ]]; then | ||||
|   python setup.py install | ||||
| fi | ||||
|  | ||||
|  | ||||
| # Add the test binaries so that they won't be git clean'ed away | ||||
| git add -f build/bin | ||||
|  | ||||
| # Test C FFI plugins | ||||
| # cffi install doesn't work for Python 3.7 | ||||
| if [[ "$BUILD_ENVIRONMENT" != *pynightly* ]]; then | ||||
|   # TODO: Don't run this here | ||||
|   pip install cffi | ||||
|   git clone https://github.com/pytorch/extension-ffi.git | ||||
|   pushd extension-ffi/script | ||||
|   python build.py | ||||
|   popd | ||||
| fi | ||||
|  | ||||
| # Test documentation build | ||||
| if [[ "$BUILD_ENVIRONMENT" == *xenial-cuda8-cudnn6-py3* ]]; then | ||||
|   pushd docs | ||||
|   # TODO: Don't run this here | ||||
|   pip install -r requirements.txt || true | ||||
|   LC_ALL=C make html | ||||
|   popd | ||||
| fi | ||||
|  | ||||
| # Test no-Python build | ||||
| if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then | ||||
|   echo "Building libtorch" | ||||
|   # NB: Install outside of source directory (at the same level as the root | ||||
|   # pytorch folder) so that it doesn't get cleaned away prior to docker push. | ||||
|   BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py | ||||
|   mkdir -p ../cpp-build/caffe2 | ||||
|   pushd ../cpp-build/caffe2 | ||||
|   WERROR=1 VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY | ||||
|   popd | ||||
|  | ||||
|   # Build custom operator tests. | ||||
|   CUSTOM_OP_BUILD="$PWD/../custom-op-build" | ||||
|   CUSTOM_OP_TEST="$PWD/test/custom_operator" | ||||
|   SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" | ||||
|   mkdir "$CUSTOM_OP_BUILD" | ||||
|   pushd "$CUSTOM_OP_BUILD" | ||||
|   CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" cmake "$CUSTOM_OP_TEST" | ||||
|   make VERBOSE=1 | ||||
|   popd | ||||
| fi | ||||
| @ -1,140 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # Common setup for all Jenkins scripts | ||||
|  | ||||
| # NB: define this function before set -x, so that we don't | ||||
| # pollute the log with a premature EXITED_USER_LAND ;) | ||||
| function cleanup { | ||||
|   # Note that if you've exited user land, then CI will conclude that | ||||
|   # any failure is the CI's fault.  So we MUST only output this | ||||
|   # string | ||||
|   retcode=$? | ||||
|   set +x | ||||
|   if [ $retcode -eq 0 ]; then | ||||
|     echo "EXITED_USER_LAND" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| # Required environment variables: | ||||
| #   $BUILD_ENVIRONMENT (should be set by your Docker image) | ||||
|  | ||||
| # This token is used by a parser on Jenkins logs for determining | ||||
| # if a failure is a legitimate problem, or a problem with the build | ||||
| # system; to find out more, grep for this string in ossci-job-dsl. | ||||
| echo "ENTERED_USER_LAND" | ||||
|  | ||||
| # compositional trap taken from https://stackoverflow.com/a/7287873/23845 | ||||
|  | ||||
| # note: printf is used instead of echo to avoid backslash | ||||
| # processing and to properly handle values that begin with a '-'. | ||||
|  | ||||
| log() { printf '%s\n' "$*"; } | ||||
| error() { log "ERROR: $*" >&2; } | ||||
| fatal() { error "$@"; exit 1; } | ||||
|  | ||||
| # appends a command to a trap | ||||
| # | ||||
| # - 1st arg:  code to add | ||||
| # - remaining args:  names of traps to modify | ||||
| # | ||||
| trap_add() { | ||||
|     trap_add_cmd=$1; shift || fatal "${FUNCNAME} usage error" | ||||
|     for trap_add_name in "$@"; do | ||||
|         trap -- "$( | ||||
|             # helper fn to get existing trap command from output | ||||
|             # of trap -p | ||||
|             extract_trap_cmd() { printf '%s\n' "$3"; } | ||||
|             # print existing trap command with newline | ||||
|             eval "extract_trap_cmd $(trap -p "${trap_add_name}")" | ||||
|             # print the new trap command | ||||
|             printf '%s\n' "${trap_add_cmd}" | ||||
|         )" "${trap_add_name}" \ | ||||
|             || fatal "unable to add to trap ${trap_add_name}" | ||||
|     done | ||||
| } | ||||
| # set the trace attribute for the above function.  this is | ||||
| # required to modify DEBUG or RETURN traps because functions don't | ||||
| # inherit them unless the trace attribute is set | ||||
| declare -f -t trap_add | ||||
|  | ||||
| trap_add cleanup EXIT | ||||
|  | ||||
| if which sccache > /dev/null; then | ||||
|   # Save sccache logs to file | ||||
|   sccache --stop-server || true | ||||
|   rm ~/sccache_error.log || true | ||||
|   SCCACHE_ERROR_LOG=~/sccache_error.log RUST_LOG=sccache::server=error sccache --start-server | ||||
|  | ||||
|   # Report sccache stats for easier debugging | ||||
|   sccache --zero-stats | ||||
|   function sccache_epilogue() { | ||||
|     echo '=================== sccache compilation log ===================' | ||||
|     python $(dirname "${BASH_SOURCE[0]}")/print_sccache_log.py ~/sccache_error.log | ||||
|     echo '=========== If your build fails, please take a look at the log above for possible reasons ===========' | ||||
|     sccache --show-stats | ||||
|     sccache --stop-server || true | ||||
|   } | ||||
|   trap_add sccache_epilogue EXIT | ||||
| fi | ||||
|  | ||||
| if which ccache > /dev/null; then | ||||
|   # Report ccache stats for easier debugging | ||||
|   ccache --zero-stats | ||||
|   ccache --show-stats | ||||
|   function ccache_epilogue() { | ||||
|     ccache --show-stats | ||||
|   } | ||||
|   trap_add ccache_epilogue EXIT | ||||
| fi | ||||
|  | ||||
| # It's called a COMPACT_JOB_NAME because it's distinct from the | ||||
| # Jenkin's provided JOB_NAME, which also includes a prefix folder | ||||
| # e.g. pytorch-builds/ | ||||
|  | ||||
| if [ -z "$COMPACT_JOB_NAME" ]; then | ||||
|   echo "Jenkins build scripts must set COMPACT_JOB_NAME" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| if grep --line-regexp -q "$COMPACT_JOB_NAME" "$(dirname "${BASH_SOURCE[0]}")/disabled-configs.txt"; then | ||||
|   echo "Job is explicitly disabled, SKIPPING" | ||||
|   exit 0 | ||||
| else | ||||
|   echo "Job is not disabled, proceeding" | ||||
| fi | ||||
|  | ||||
| if grep --line-regexp -q "$COMPACT_JOB_NAME" "$(dirname "${BASH_SOURCE[0]}")/enabled-configs.txt"; then | ||||
|   echo "Job is enabled, proceeding" | ||||
| else | ||||
|   echo "Job is not enabled, FAILING now (revert changes to enabled-configs.txt to fix this)" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-xenial-cuda9-cudnn7-py3 ]] || \ | ||||
|    [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-trusty-py3.6-gcc7* ]]; then | ||||
|   BUILD_TEST_LIBTORCH=1 | ||||
| else | ||||
|   BUILD_TEST_LIBTORCH=0 | ||||
| fi | ||||
|  | ||||
| # Use conda cmake in some CI build. Conda cmake will be newer than our supported | ||||
| # min version 3.5, so we only do it in two builds that we know should use conda. | ||||
| if [[ "$BUILD_ENVIRONMENT" == *pytorch-linux-xenial-cuda* ]]; then | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *cuda8-cudnn6-py2* ]] || \ | ||||
|      [[ "$BUILD_ENVIRONMENT" == *cuda9-cudnn7-py3* ]]; then | ||||
|     if ! which conda; then | ||||
|       echo "Expected ${BUILD_ENVIRONMENT} to use conda, but 'which conda' returns empty" | ||||
|       exit 1 | ||||
|     else | ||||
|       conda install -q -y cmake | ||||
|     fi | ||||
|   else | ||||
|     if ! cmake --version | grep 'cmake version 3\.5'; then | ||||
|       echo "Expected ${BUILD_ENVIRONMENT} to have cmake version 3.5.* (min support version), but 'cmake --version' returns:" | ||||
|       cmake --version | ||||
|       exit 1 | ||||
|     fi | ||||
|   fi | ||||
| fi | ||||
| @ -1,10 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -ex | ||||
| upstream="$1" | ||||
| pr="$2" | ||||
| git diff --name-only "$upstream" "$pr" | ||||
| # Now that PyTorch build depends on Caffe2, unconditionally trigger | ||||
| # for any changes. | ||||
| # TODO: Replace this with a NEGATIVE regex that allows us to blacklist | ||||
| # files (letting us skip builds when they are unnecessary) | ||||
| #git diff --name-only "$upstream" "$pr" | grep -Eq '^(aten/|caffe2/|.jenkins/pytorch|docs/(make.bat|Makefile|requirements.txt|source)|mypy|requirements.txt|setup.py|test/|third_party/|tools/|\.gitmodules|torch/)' | ||||
| @ -1,5 +0,0 @@ | ||||
| # This file contains a list of disabled configurations.  Disabled | ||||
| # configurations are skipped and not considered a failure if they | ||||
| # fail.  You can use this to temporarily reserve a test name to | ||||
| # turn on CI side before PyTorch repository supports it.  This | ||||
| # file has the same format as .jenkins/enabled-configs.txt | ||||
| @ -1,6 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME="docker-build-test" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| docker build -t pytorch . | ||||
| @ -1,48 +0,0 @@ | ||||
| # This file contains a list of enabled configurations | ||||
| # to perform tests on.  If you want to run tests on CI on | ||||
| # a limited set of tests before enabling the full test suite, | ||||
| # you can delete lines from this file.  Any test that is not | ||||
| # in this file will report a failure (so you don't forget to | ||||
| # reenable the tests on merge ;) | ||||
|  | ||||
| pytorch-linux-xenial-cuda8-cudnn6-py3-build | ||||
| pytorch-linux-xenial-cuda8-cudnn6-py3-test | ||||
| pytorch-linux-xenial-cuda8-cudnn6-py3-multigpu-test | ||||
| pytorch-linux-xenial-cuda9-cudnn7-py2-build | ||||
| pytorch-linux-xenial-cuda9-cudnn7-py2-test | ||||
| pytorch-linux-xenial-cuda9-cudnn7-py3-build | ||||
| pytorch-linux-xenial-cuda9-cudnn7-py3-test | ||||
| pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-build | ||||
| pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7-test | ||||
| pytorch-linux-xenial-py3-clang5-asan-build | ||||
| pytorch-linux-xenial-py3-clang5-asan-test | ||||
| pytorch-linux-trusty-py2.7.9-build | ||||
| pytorch-linux-trusty-py2.7.9-test | ||||
| pytorch-linux-trusty-py2.7-build | ||||
| pytorch-linux-trusty-py2.7-test | ||||
| pytorch-linux-trusty-py3.5-build | ||||
| pytorch-linux-trusty-py3.5-test | ||||
| pytorch-linux-trusty-py3.6-gcc4.8-build | ||||
| pytorch-linux-trusty-py3.6-gcc4.8-test | ||||
| pytorch-linux-trusty-py3.6-gcc5.4-build | ||||
| pytorch-linux-trusty-py3.6-gcc5.4-test | ||||
| pytorch-linux-trusty-py3.6-gcc7.2-build | ||||
| pytorch-linux-trusty-py3.6-gcc7.2-test | ||||
| pytorch-linux-trusty-py3.6-gcc7-build | ||||
| pytorch-linux-trusty-py3.6-gcc7-test | ||||
| pytorch-linux-trusty-pynightly-build | ||||
| pytorch-linux-trusty-pynightly-test | ||||
| pytorch-win-ws2016-cuda9-cudnn7-py3-build | ||||
| pytorch-win-ws2016-cuda9-cudnn7-py3-test | ||||
| pytorch-macos-10.13-py3-build | ||||
| pytorch-macos-10.13-py3-test | ||||
| pytorch-macos-10.13-cuda9.2-cudnn7-py3-build | ||||
| pytorch-docker-build-test | ||||
| short-perf-test-cpu | ||||
| short-perf-test-gpu | ||||
| py2-clang3.8-rocm1.7.1-ubuntu16.04-build | ||||
| py2-clang3.8-rocm1.7.1-ubuntu16.04-test | ||||
| pytorch-ppc64le-cuda9.2-cudnn7-py3-build | ||||
| pytorch-ppc64le-cuda9.2-cudnn7-py3-test | ||||
| pytorch-ppc64le-cuda9.1-cudnn7-py3-build | ||||
| pytorch-ppc64le-cuda9.1-cudnn7-py3-test | ||||
| @ -1,9 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-build* ]]; then | ||||
|   source "$(dirname "${BASH_SOURCE[0]}")/macos-build.sh" | ||||
| fi | ||||
|  | ||||
| if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test* ]]; then | ||||
|   source "$(dirname "${BASH_SOURCE[0]}")/macos-test.sh" | ||||
| fi | ||||
| @ -1,72 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-build" | ||||
| export PATH="/usr/local/bin:$PATH" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| # Set up conda environment | ||||
| export PYTORCH_ENV_DIR="${HOME}/pytorch-ci-env" | ||||
| # If a local installation of conda doesn't exist, we download and install conda | ||||
| if [ ! -d "${PYTORCH_ENV_DIR}/miniconda3" ]; then | ||||
|   mkdir -p ${PYTORCH_ENV_DIR} | ||||
|   curl https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ${PYTORCH_ENV_DIR}/miniconda3.sh | ||||
|   bash ${PYTORCH_ENV_DIR}/miniconda3.sh -b -p ${PYTORCH_ENV_DIR}/miniconda3 | ||||
| fi | ||||
| export PATH="${PYTORCH_ENV_DIR}/miniconda3/bin:$PATH" | ||||
| source ${PYTORCH_ENV_DIR}/miniconda3/bin/activate | ||||
| conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja | ||||
| rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch* | ||||
|  | ||||
| git submodule update --init --recursive | ||||
| export CMAKE_PREFIX_PATH=${PYTORCH_ENV_DIR}/miniconda3/ | ||||
|  | ||||
| # Build PyTorch | ||||
| if [[ "${JOB_BASE_NAME}" == *cuda9.2* ]]; then | ||||
|   export CUDA_VERSION=9.2 | ||||
|   export TORCH_CUDA_ARCH_LIST=5.2 | ||||
|   export PATH=/Developer/NVIDIA/CUDA-${CUDA_VERSION}/bin${PATH:+:${PATH}} | ||||
|   export DYLD_LIBRARY_PATH=/Developer/NVIDIA/CUDA-${CUDA_VERSION}/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}} | ||||
|   export CUDA_HOME=/Developer/NVIDIA/CUDA-${CUDA_VERSION} | ||||
|   export NO_CUDA=0 | ||||
|  | ||||
|   if [ -z "${IN_CIRCLECI}" ]; then | ||||
|     # Eigen gives "explicit specialization of class must precede its first use" error | ||||
|     # when compiling with Xcode 9.1 toolchain, so we have to use Xcode 8.2 toolchain instead. | ||||
|     export DEVELOPER_DIR=/Library/Developer/CommandLineTools | ||||
|   fi | ||||
| else | ||||
|   if [ -z "${IN_CIRCLECI}" ]; then | ||||
|     export DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| export MACOSX_DEPLOYMENT_TARGET=10.9 | ||||
| export CXX=clang++ | ||||
| export CC=clang | ||||
| if which sccache > /dev/null; then | ||||
|   printf "#!/bin/sh\nexec sccache $(which clang++) \$*" > "${PYTORCH_ENV_DIR}/clang++" | ||||
|   chmod a+x "${PYTORCH_ENV_DIR}/clang++" | ||||
|  | ||||
|   printf "#!/bin/sh\nexec sccache $(which clang) \$*" > "${PYTORCH_ENV_DIR}/clang" | ||||
|   chmod a+x "${PYTORCH_ENV_DIR}/clang" | ||||
|  | ||||
|   if [[ "${JOB_BASE_NAME}" == *cuda* ]]; then | ||||
|     printf "#!/bin/sh\nexec sccache $(which nvcc) \$*" > "${PYTORCH_ENV_DIR}/nvcc" | ||||
|     chmod a+x "${PYTORCH_ENV_DIR}/nvcc" | ||||
|     export CUDA_NVCC_EXECUTABLE="${PYTORCH_ENV_DIR}/nvcc" | ||||
|   fi | ||||
|  | ||||
|   export PATH="${PYTORCH_ENV_DIR}:$PATH" | ||||
| fi | ||||
| # If we run too many parallel jobs, we will OOM | ||||
| export MAX_JOBS=2 | ||||
|  | ||||
| export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID} | ||||
|  | ||||
| python setup.py install | ||||
|  | ||||
| # Upload torch binaries when the build job is finished | ||||
| if [ -z "${IN_CIRCLECI}" ]; then | ||||
|   7z a ${IMAGE_COMMIT_TAG}.7z ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch* | ||||
|   aws s3 cp ${IMAGE_COMMIT_TAG}.7z s3://ossci-macos-build/pytorch/${IMAGE_COMMIT_TAG}.7z --acl public-read | ||||
| fi | ||||
| @ -1,112 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-test" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| export PATH="/usr/local/bin:$PATH" | ||||
|  | ||||
| # Set up conda environment | ||||
| export PYTORCH_ENV_DIR="${HOME}/pytorch-ci-env" | ||||
| # If a local installation of conda doesn't exist, we download and install conda | ||||
| if [ ! -d "${PYTORCH_ENV_DIR}/miniconda3" ]; then | ||||
|   mkdir -p ${PYTORCH_ENV_DIR} | ||||
|   curl https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ${PYTORCH_ENV_DIR}/miniconda3.sh | ||||
|   bash ${PYTORCH_ENV_DIR}/miniconda3.sh -b -p ${PYTORCH_ENV_DIR}/miniconda3 | ||||
| fi | ||||
| export PATH="${PYTORCH_ENV_DIR}/miniconda3/bin:$PATH" | ||||
| source ${PYTORCH_ENV_DIR}/miniconda3/bin/activate | ||||
| conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja | ||||
| if [ -z "${IN_CIRCLECI}" ]; then | ||||
|   rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch* | ||||
| fi | ||||
|  | ||||
| git submodule update --init --recursive | ||||
| export CMAKE_PREFIX_PATH=${PYTORCH_ENV_DIR}/miniconda3/ | ||||
|  | ||||
| # Test PyTorch | ||||
| if [ -z "${IN_CIRCLECI}" ]; then | ||||
|   if [[ "${JOB_BASE_NAME}" == *cuda9.2* ]]; then | ||||
|     # Eigen gives "explicit specialization of class must precede its first use" error | ||||
|     # when compiling with Xcode 9.1 toolchain, so we have to use Xcode 8.2 toolchain instead. | ||||
|     export DEVELOPER_DIR=/Library/Developer/CommandLineTools | ||||
|   else | ||||
|     export DEVELOPER_DIR=/Applications/Xcode9.app/Contents/Developer | ||||
|   fi | ||||
| fi | ||||
| export MACOSX_DEPLOYMENT_TARGET=10.9 | ||||
| export CXX=clang++ | ||||
| export CC=clang | ||||
| # If we run too many parallel jobs, we will OOM | ||||
| export MAX_JOBS=2 | ||||
|  | ||||
| export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID} | ||||
|  | ||||
| # Download torch binaries in the test jobs | ||||
| if [ -z "${IN_CIRCLECI}" ]; then | ||||
|   rm -rf ${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages/torch* | ||||
|   aws s3 cp s3://ossci-macos-build/pytorch/${IMAGE_COMMIT_TAG}.7z ${IMAGE_COMMIT_TAG}.7z | ||||
|   7z x ${IMAGE_COMMIT_TAG}.7z -o"${PYTORCH_ENV_DIR}/miniconda3/lib/python3.6/site-packages" | ||||
| fi | ||||
|  | ||||
| test_python_all() { | ||||
|   echo "Ninja version: $(ninja --version)" | ||||
|   python test/run_test.py --verbose | ||||
| } | ||||
|  | ||||
| test_cpp_api() { | ||||
|   # C++ API | ||||
|  | ||||
|   # NB: Install outside of source directory (at the same level as the root | ||||
|   # pytorch folder) so that it doesn't get cleaned away prior to docker push. | ||||
|   # But still clean it before we perform our own build. | ||||
|   # | ||||
|   CPP_BUILD="$PWD/../cpp-build" | ||||
|   rm -rf $CPP_BUILD | ||||
|   mkdir -p $CPP_BUILD/caffe2 | ||||
|  | ||||
|   BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py | ||||
|   pushd $CPP_BUILD/caffe2 | ||||
|   VERBOSE=1 DEBUG=1 python $BUILD_LIBTORCH_PY | ||||
|   popd | ||||
|  | ||||
|   python tools/download_mnist.py --quiet -d test/cpp/api/mnist | ||||
|  | ||||
|   # Unfortunately it seems like the test can't load from miniconda3 | ||||
|   # without these paths being set | ||||
|   export DYLD_LIBRARY_PATH="$DYLD_LIBRARY_PATH:$PWD/miniconda3/lib" | ||||
|   export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PWD/miniconda3/lib" | ||||
|   "$CPP_BUILD"/caffe2/bin/test_api | ||||
| } | ||||
|  | ||||
| test_custom_script_ops() { | ||||
|   echo "Testing custom script operators" | ||||
|   pushd test/custom_operator | ||||
|   # Build the custom operator library. | ||||
|   rm -rf build && mkdir build | ||||
|   pushd build | ||||
|   SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" | ||||
|   CMAKE_PREFIX_PATH="$SITE_PACKAGES/torch" cmake .. | ||||
|   make VERBOSE=1 | ||||
|   popd | ||||
|  | ||||
|   # Run tests Python-side and export a script module. | ||||
|   python test_custom_ops.py -v | ||||
|   python model.py --export-script-module=model.pt | ||||
|   # Run tests C++-side and load the exported script module. | ||||
|   build/test_custom_ops ./model.pt | ||||
|   popd | ||||
| } | ||||
|  | ||||
|  | ||||
| if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then | ||||
|   test_python_all | ||||
|   test_cpp_api | ||||
|   test_custom_script_ops | ||||
| else | ||||
|   if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then | ||||
|     test_python_all | ||||
|   elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then | ||||
|     test_cpp_api | ||||
|     test_custom_script_ops | ||||
|   fi | ||||
| fi | ||||
| @ -1,28 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # Required environment variable: $BUILD_ENVIRONMENT | ||||
| # (This is set by default in the Docker images we build, so you don't | ||||
| # need to set it yourself. | ||||
|  | ||||
| COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-multigpu-test" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| echo "Testing pytorch (distributed only)" | ||||
|  | ||||
| if [ -n "${IN_CIRCLECI}" ]; then | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then | ||||
|     # TODO: move this to Docker | ||||
|     sudo apt-get update | ||||
|     sudo apt-get install -y --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0 | ||||
|   fi | ||||
|  | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda8-* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]]; then | ||||
|     # TODO: move this to Docker | ||||
|     sudo apt-get update | ||||
|     sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev | ||||
|     sudo apt-get install -y --no-install-recommends openssh-client openssh-server | ||||
|     sudo mkdir -p /var/run/sshd | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| time python test/run_test.py --verbose -i distributed | ||||
| @ -1,21 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| run_test () { | ||||
|   rm -rf test_tmp/ && mkdir test_tmp/ && cd test_tmp/ | ||||
|   "$@" | ||||
|   cd .. && rm -rf test_tmp/ | ||||
| } | ||||
|  | ||||
| get_runtime_of_command () { | ||||
|   TIMEFORMAT=%R | ||||
|  | ||||
|   # runtime=$( { time ($@ &> /dev/null); } 2>&1 1>/dev/null) | ||||
|   runtime=$( { time $@; } 2>&1 1>/dev/null) | ||||
|   if [[ $runtime == *"Error"* ]]; then | ||||
|     exit 1 | ||||
|   fi | ||||
|   runtime=${runtime#+++ $@} | ||||
|   runtime=$(python -c "print($runtime)") | ||||
|  | ||||
|   echo $runtime | ||||
| } | ||||
| @ -1,66 +0,0 @@ | ||||
| import sys | ||||
| import json | ||||
| import numpy | ||||
| import argparse | ||||
|  | ||||
| parser = argparse.ArgumentParser() | ||||
| parser.add_argument('--test-name', dest='test_name', action='store', | ||||
|                     required=True, help='test name') | ||||
| parser.add_argument('--sample-stats', dest='sample_stats', action='store', | ||||
|                     required=True, help='stats from sample') | ||||
| parser.add_argument('--update', action='store_true', | ||||
|                     help='whether to update baseline using stats from sample') | ||||
| args = parser.parse_args() | ||||
|  | ||||
| test_name = args.test_name | ||||
|  | ||||
| if 'cpu' in test_name: | ||||
|     backend = 'cpu' | ||||
| elif 'gpu' in test_name: | ||||
|     backend = 'gpu' | ||||
|  | ||||
| data_file_path = '../{}_runtime.json'.format(backend) | ||||
|  | ||||
| with open(data_file_path) as data_file: | ||||
|     data = json.load(data_file) | ||||
|  | ||||
| if test_name in data: | ||||
|     mean = float(data[test_name]['mean']) | ||||
|     sigma = float(data[test_name]['sigma']) | ||||
| else: | ||||
|     # Let the test pass if baseline number doesn't exist | ||||
|     mean = sys.maxsize | ||||
|     sigma = 0.001 | ||||
|  | ||||
| print("population mean: ", mean) | ||||
| print("population sigma: ", sigma) | ||||
|  | ||||
| sample_stats_data = json.loads(args.sample_stats) | ||||
|  | ||||
| sample_mean = sample_stats_data['mean'] | ||||
| sample_sigma = sample_stats_data['sigma'] | ||||
|  | ||||
| print("sample mean: ", sample_mean) | ||||
| print("sample sigma: ", sample_sigma) | ||||
|  | ||||
| z_value = (sample_mean - mean) / sigma | ||||
|  | ||||
| print("z-value: ", z_value) | ||||
|  | ||||
| if z_value >= 3: | ||||
|     raise Exception('''\n | ||||
| z-value >= 3, there is high chance of perf regression.\n | ||||
| To reproduce this regression, run `cd .jenkins/pytorch/perf_test/ && bash ''' + test_name + '''.sh` on your local machine and compare the runtime before/after your code change. | ||||
| ''') | ||||
| else: | ||||
|     print("z-value < 3, no perf regression detected.") | ||||
|     if args.update: | ||||
|         print("We will use these numbers as new baseline.") | ||||
|         new_data_file_path = '../new_{}_runtime.json'.format(backend) | ||||
|         with open(new_data_file_path) as new_data_file: | ||||
|             new_data = json.load(new_data_file) | ||||
|         new_data[test_name] = {} | ||||
|         new_data[test_name]['mean'] = sample_mean | ||||
|         new_data[test_name]['sigma'] = max(sample_sigma, sample_mean * 0.1) | ||||
|         with open(new_data_file_path, 'w') as new_data_file: | ||||
|             json.dump(new_data, new_data_file, indent=4) | ||||
| @ -1,16 +0,0 @@ | ||||
| import sys | ||||
| import json | ||||
| import numpy | ||||
|  | ||||
| sample_data_list = sys.argv[1:] | ||||
| sample_data_list = [float(v.strip()) for v in sample_data_list] | ||||
|  | ||||
| sample_mean = numpy.mean(sample_data_list) | ||||
| sample_sigma = numpy.std(sample_data_list) | ||||
|  | ||||
| data = { | ||||
|     'mean': sample_mean, | ||||
|     'sigma': sample_sigma, | ||||
| } | ||||
|  | ||||
| print(json.dumps(data)) | ||||
| @ -1,42 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_mini_sequence_labeler () { | ||||
|   echo "Testing: mini sequence labeler, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 726567a455edbfda6199445922a8cfee82535664 | ||||
|  | ||||
|   cd scripts/mini_sequence_labeler | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py) | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_mini_sequence_labeler "$@" | ||||
| fi | ||||
| @ -1,44 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_mnist () { | ||||
|   echo "Testing: MNIST, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/examples.git -b perftests | ||||
|  | ||||
|   cd examples/mnist | ||||
|  | ||||
|   pip install -r requirements.txt | ||||
|  | ||||
|   # Download data | ||||
|   python main.py --epochs 0 | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log) | ||||
|     echo $runtime | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_mnist "$@" | ||||
| fi | ||||
| @ -1,28 +0,0 @@ | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_torch () { | ||||
|   echo "Testing: torch.*, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/yf225/perf-tests.git | ||||
|  | ||||
|   if [ "$1" == "compare_with_baseline" ]; then | ||||
|     export ARGS="--compare ../cpu_runtime.json" | ||||
|   elif [ "$1" == "compare_and_update" ]; then | ||||
|     export ARGS="--compare ../cpu_runtime.json --update ../new_cpu_runtime.json" | ||||
|   elif [ "$1" == "update_only" ]; then | ||||
|     export ARGS="--update ../new_cpu_runtime.json" | ||||
|   fi | ||||
|  | ||||
|   if ! python perf-tests/modules/test_cpu_torch.py ${ARGS}; then | ||||
|     echo "To reproduce this regression, run \`cd .jenkins/pytorch/perf_test/ && bash "${FUNCNAME[0]}".sh\` on your local machine and compare the runtime before/after your code change." | ||||
|     exit 1 | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_torch "$@" | ||||
| fi | ||||
|  | ||||
| @ -1,28 +0,0 @@ | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_torch_tensor () { | ||||
|   echo "Testing: torch.Tensor.*, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/yf225/perf-tests.git | ||||
|  | ||||
|   if [ "$1" == "compare_with_baseline" ]; then | ||||
|     export ARGS="--compare ../cpu_runtime.json" | ||||
|   elif [ "$1" == "compare_and_update" ]; then | ||||
|     export ARGS="--compare ../cpu_runtime.json --update ../new_cpu_runtime.json" | ||||
|   elif [ "$1" == "update_only" ]; then | ||||
|     export ARGS="--update ../new_cpu_runtime.json" | ||||
|   fi | ||||
|  | ||||
|   if ! python perf-tests/modules/test_cpu_torch_tensor.py ${ARGS}; then | ||||
|     echo "To reproduce this regression, run \`cd .jenkins/pytorch/perf_test/ && bash "${FUNCNAME[0]}".sh\` on your local machine and compare the runtime before/after your code change." | ||||
|     exit 1 | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_torch_tensor "$@" | ||||
| fi | ||||
|  | ||||
| @ -1,43 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_cudnn_lstm () { | ||||
|   echo "Testing: CuDNN LSTM, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0 | ||||
|  | ||||
|   cd scripts/ | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python cudnn_lstm.py --skip-cpu-governor-check) | ||||
|     echo $runtime | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_cudnn_lstm "$@" | ||||
| fi | ||||
| @ -1,43 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_lstm () { | ||||
|   echo "Testing: LSTM, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0 | ||||
|  | ||||
|   cd scripts/ | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python lstm.py --skip-cpu-governor-check) | ||||
|     echo $runtime | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_lstm "$@" | ||||
| fi | ||||
| @ -1,43 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_mlstm () { | ||||
|   echo "Testing: MLSTM, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0 | ||||
|  | ||||
|   cd scripts/ | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python mlstm.py --skip-cpu-governor-check) | ||||
|     echo $runtime | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_mlstm "$@" | ||||
| fi | ||||
| @ -1,44 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_mnist () { | ||||
|   echo "Testing: MNIST, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/examples.git -b perftests | ||||
|  | ||||
|   cd examples/mnist | ||||
|  | ||||
|   pip install -r requirements.txt | ||||
|  | ||||
|   # Download data | ||||
|   python main.py --epochs 0 | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log) | ||||
|     echo $runtime | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_mnist "$@" | ||||
| fi | ||||
| @ -1,52 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_word_language_model () { | ||||
|   echo "Testing: word language model on Wikitext-2, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/examples.git -b perftests | ||||
|  | ||||
|   cd examples/word_language_model | ||||
|  | ||||
|   cd data/wikitext-2 | ||||
|  | ||||
|   # Reduce dataset size, so that we can have more runs per test | ||||
|   sed -n '1,200p' test.txt > test_tmp.txt | ||||
|   sed -n '1,1000p' train.txt > train_tmp.txt | ||||
|   sed -n '1,200p' valid.txt > valid_tmp.txt | ||||
|  | ||||
|   mv test_tmp.txt test.txt | ||||
|   mv train_tmp.txt train.txt | ||||
|   mv valid_tmp.txt valid.txt | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=$NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py --cuda --epochs 1) | ||||
|     echo $runtime | ||||
|     SAMPLE_ARRAY+=(${runtime}) | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py ${SAMPLE_ARRAY[@]}) | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo $stats | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name ${FUNCNAME[0]} --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_word_language_model "$@" | ||||
| fi | ||||
| @ -1,13 +0,0 @@ | ||||
| import sys | ||||
| import json | ||||
|  | ||||
| data_file_path = sys.argv[1] | ||||
| commit_hash = sys.argv[2] | ||||
|  | ||||
| with open(data_file_path) as data_file: | ||||
|     data = json.load(data_file) | ||||
|  | ||||
| data['commit'] = commit_hash | ||||
|  | ||||
| with open(data_file_path, 'w') as data_file: | ||||
|     json.dump(data, data_file) | ||||
| @ -1,11 +0,0 @@ | ||||
| import sys | ||||
|  | ||||
| log_file_path = sys.argv[1] | ||||
|  | ||||
| with open(log_file_path) as f: | ||||
|     lines = f.readlines() | ||||
|  | ||||
| for line in lines: | ||||
|     # Ignore errors from CPU instruction set testing | ||||
|     if 'src.c' not in line: | ||||
|         print(line) | ||||
| @ -1,64 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME="short-perf-test-cpu" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| cd .jenkins/pytorch/perf_test | ||||
|  | ||||
| echo "Running CPU perf test for PyTorch..." | ||||
|  | ||||
| pip install awscli | ||||
|  | ||||
| # Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read | ||||
| # More info at https://github.com/aws/aws-cli/issues/2321 | ||||
| aws configure set default.s3.multipart_threshold 5GB | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     # Get current master commit hash | ||||
|     export MASTER_COMMIT_ID=$(git log --format="%H" -n 1) | ||||
| fi | ||||
|  | ||||
| # Find the master commit to test against | ||||
| git remote add upstream https://github.com/pytorch/pytorch.git | ||||
| git fetch upstream | ||||
| IFS=$'\n' | ||||
| master_commit_ids=($(git rev-list upstream/master)) | ||||
| for commit_id in "${master_commit_ids[@]}"; do | ||||
|     if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/${commit_id}.json; then | ||||
|         LATEST_TESTED_COMMIT=${commit_id} | ||||
|         break | ||||
|     fi | ||||
| done | ||||
| aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/${LATEST_TESTED_COMMIT}.json cpu_runtime.json | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     # Prepare new baseline file | ||||
|     cp cpu_runtime.json new_cpu_runtime.json | ||||
|     python update_commit_hash.py new_cpu_runtime.json ${MASTER_COMMIT_ID} | ||||
| fi | ||||
|  | ||||
| # Include tests | ||||
| . ./test_cpu_speed_mini_sequence_labeler.sh | ||||
| . ./test_cpu_speed_mnist.sh | ||||
| . ./test_cpu_speed_torch.sh | ||||
| . ./test_cpu_speed_torch_tensor.sh | ||||
|  | ||||
| # Run tests | ||||
| export TEST_MODE="compare_with_baseline" | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     export TEST_MODE="compare_and_update" | ||||
| fi | ||||
|  | ||||
| # Operator tests | ||||
| run_test test_cpu_speed_torch ${TEST_MODE} | ||||
| run_test test_cpu_speed_torch_tensor ${TEST_MODE} | ||||
|  | ||||
| # Sample model tests | ||||
| run_test test_cpu_speed_mini_sequence_labeler 20 ${TEST_MODE} | ||||
| run_test test_cpu_speed_mnist 20 ${TEST_MODE} | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     # This could cause race condition if we are testing the same master commit twice, | ||||
|     # but the chance of them executing this line at the same time is low. | ||||
|     aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/${MASTER_COMMIT_ID}.json --acl public-read | ||||
| fi | ||||
| @ -1,68 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME="short-perf-test-gpu" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| pushd .jenkins/pytorch/perf_test | ||||
|  | ||||
| echo "Running GPU perf test for PyTorch..." | ||||
|  | ||||
| pip install awscli | ||||
|  | ||||
| # Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read | ||||
| # More info at https://github.com/aws/aws-cli/issues/2321 | ||||
| aws configure set default.s3.multipart_threshold 5GB | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     # Get current master commit hash | ||||
|     export MASTER_COMMIT_ID=$(git log --format="%H" -n 1) | ||||
| fi | ||||
|  | ||||
| # Find the master commit to test against | ||||
| git remote add upstream https://github.com/pytorch/pytorch.git | ||||
| git fetch upstream | ||||
| IFS=$'\n' | ||||
| master_commit_ids=($(git rev-list upstream/master)) | ||||
| for commit_id in "${master_commit_ids[@]}"; do | ||||
|     if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/${commit_id}.json; then | ||||
|         LATEST_TESTED_COMMIT=${commit_id} | ||||
|         break | ||||
|     fi | ||||
| done | ||||
| aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/${LATEST_TESTED_COMMIT}.json gpu_runtime.json | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     # Prepare new baseline file | ||||
|     cp gpu_runtime.json new_gpu_runtime.json | ||||
|     python update_commit_hash.py new_gpu_runtime.json ${MASTER_COMMIT_ID} | ||||
| fi | ||||
|  | ||||
| # Include tests | ||||
| . ./test_gpu_speed_mnist.sh | ||||
| . ./test_gpu_speed_word_language_model.sh | ||||
| . ./test_gpu_speed_cudnn_lstm.sh | ||||
| . ./test_gpu_speed_lstm.sh | ||||
| . ./test_gpu_speed_mlstm.sh | ||||
|  | ||||
| # Run tests | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     run_test test_gpu_speed_mnist 20 compare_and_update | ||||
|     run_test test_gpu_speed_word_language_model 20 compare_and_update | ||||
|     run_test test_gpu_speed_cudnn_lstm 20 compare_and_update | ||||
|     run_test test_gpu_speed_lstm 20 compare_and_update | ||||
|     run_test test_gpu_speed_mlstm 20 compare_and_update | ||||
| else | ||||
|     run_test test_gpu_speed_mnist 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_word_language_model 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_cudnn_lstm 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_lstm 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_mlstm 20 compare_with_baseline | ||||
| fi | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == master ]]; then | ||||
|     # This could cause race condition if we are testing the same master commit twice, | ||||
|     # but the chance of them executing this line at the same time is low. | ||||
|     aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/${MASTER_COMMIT_ID}.json --acl public-read | ||||
| fi | ||||
|  | ||||
| popd | ||||
| @ -1,177 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME="${BUILD_ENVIRONMENT}-test" | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| # Required environment variable: $BUILD_ENVIRONMENT | ||||
| # (This is set by default in the Docker images we build, so you don't | ||||
| # need to set it yourself. | ||||
|  | ||||
| echo "Testing pytorch" | ||||
|  | ||||
| if [ -n "${IN_CIRCLECI}" ]; then | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-* ]]; then | ||||
|     # TODO: move this to Docker | ||||
|     sudo apt-get update | ||||
|     sudo apt-get install -y --allow-downgrades --allow-change-held-packages libnccl-dev=2.2.13-1+cuda9.0 libnccl2=2.2.13-1+cuda9.0 | ||||
|   fi | ||||
|  | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda8-* ]] || [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]]; then | ||||
|     # TODO: move this to Docker | ||||
|     sudo apt-get update | ||||
|     sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev | ||||
|     sudo apt-get install -y --no-install-recommends openssh-client openssh-server | ||||
|     sudo mkdir -p /var/run/sshd | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| # JIT C++ extensions require ninja. | ||||
| git clone https://github.com/ninja-build/ninja --quiet | ||||
| pushd ninja | ||||
| python ./configure.py --bootstrap | ||||
| export PATH="$PWD:$PATH" | ||||
| popd | ||||
|  | ||||
| # DANGER WILL ROBINSON.  The LD_PRELOAD here could cause you problems | ||||
| # if you're not careful.  Check this if you made some changes and the | ||||
| # ASAN test is not working | ||||
| if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then | ||||
|     export ASAN_OPTIONS=detect_leaks=0:symbolize=1:strict_init_order=true | ||||
|     # We suppress the vptr volation, since we have separate copies of | ||||
|     # libprotobuf in both libtorch.so and libcaffe2.so, and it causes | ||||
|     # the following problem: | ||||
|     #    test_cse (__main__.TestJit) ... torch/csrc/jit/export.cpp:622:38: | ||||
|     #        runtime error: member call on address ... which does not point | ||||
|     #        to an object of type 'google::protobuf::MessageLite' | ||||
|     #        ...: note: object is of type 'onnx_torch::ModelProto' | ||||
|     # | ||||
|     # This problem should be solved when libtorch.so and libcaffe2.so are | ||||
|     # merged. | ||||
|     export UBSAN_OPTIONS=print_stacktrace=1:suppressions=$PWD/ubsan.supp | ||||
|     export PYTORCH_TEST_WITH_ASAN=1 | ||||
|     export PYTORCH_TEST_WITH_UBSAN=1 | ||||
|     # TODO: Figure out how to avoid hard-coding these paths | ||||
|     export ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-5.0/bin/llvm-symbolizer | ||||
|     export LD_PRELOAD=/usr/lib/llvm-5.0/lib/clang/5.0.0/lib/linux/libclang_rt.asan-x86_64.so | ||||
|     # Increase stack size, because ASAN red zones use more stack | ||||
|     ulimit -s 81920 | ||||
|  | ||||
|     function get_exit_code() { | ||||
|       set +e | ||||
|       "$@" | ||||
|       retcode=$? | ||||
|       set -e | ||||
|       return $retcode | ||||
|     } | ||||
|     (cd test && python -c "import torch") | ||||
|     echo "The next three invocations are expected to crash; if they don't that means ASAN/UBSAN is misconfigured" | ||||
|     (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_csrc_asan(3)") | ||||
|     (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_csrc_ubsan(0)") | ||||
|     (cd test && ! get_exit_code python -c "import torch; torch._C._crash_if_aten_asan(3)") | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then | ||||
|   export PYTORCH_TEST_WITH_ROCM=1 | ||||
| fi | ||||
|  | ||||
| if [[ "${JOB_BASE_NAME}" == *-NO_AVX-* ]]; then | ||||
|   export ATEN_CPU_CAPABILITY=default | ||||
| elif [[ "${JOB_BASE_NAME}" == *-NO_AVX2-* ]]; then | ||||
|   export ATEN_CPU_CAPABILITY=avx | ||||
| fi | ||||
|  | ||||
| test_python_nn() { | ||||
|   time python test/run_test.py --include nn --verbose | ||||
| } | ||||
|  | ||||
| test_python_all_except_nn() { | ||||
|   time python test/run_test.py --exclude nn --verbose | ||||
| } | ||||
|  | ||||
| test_aten() { | ||||
|   # Test ATen | ||||
|   # The following test(s) of ATen have already been skipped by caffe2 in rocm environment: | ||||
|   # scalar_tensor_test, basic, native_test | ||||
|   if ([[ "$BUILD_ENVIRONMENT" != *asan* ]] && [[ "$BUILD_ENVIRONMENT" != *rocm* ]]); then | ||||
|     echo "Running ATen tests with pytorch lib" | ||||
|     TORCH_LIB_PATH=$(python -c "import site; print(site.getsitepackages()[0])")/torch/lib | ||||
|     # NB: the ATen test binaries don't have RPATH set, so it's necessary to | ||||
|     # put the dynamic libraries somewhere were the dynamic linker can find them. | ||||
|     # This is a bit of a hack. | ||||
|     if [[ "$BUILD_ENVIRONMENT" == *ppc64le* ]]; then | ||||
|       SUDO=sudo | ||||
|     fi | ||||
|  | ||||
|     ${SUDO} ln -s "$TORCH_LIB_PATH"/libcaffe2* build/bin | ||||
|     ${SUDO} ln -s "$TORCH_LIB_PATH"/libnccl* build/bin | ||||
|  | ||||
|     ls build/bin | ||||
|     aten/tools/run_tests.sh build/bin | ||||
|   fi | ||||
| } | ||||
|  | ||||
| test_torchvision() { | ||||
|   rm -rf ninja | ||||
|  | ||||
|   echo "Installing torchvision at branch master" | ||||
|   rm -rf vision | ||||
|   # TODO: This git clone is bad, it means pushes to torchvision can break | ||||
|   # PyTorch CI | ||||
|   git clone https://github.com/pytorch/vision --quiet | ||||
|   pushd vision | ||||
|   # python setup.py install with a tqdm dependency is broken in the | ||||
|   # Travis Python nightly (but not in latest Python nightlies, so | ||||
|   # this should be a transient requirement...) | ||||
|   # See https://github.com/pytorch/pytorch/issues/7525 | ||||
|   #time python setup.py install | ||||
|   pip install --user . | ||||
|   popd | ||||
| } | ||||
|  | ||||
| test_libtorch() { | ||||
|   if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then | ||||
|      echo "Testing libtorch" | ||||
|      CPP_BUILD="$PWD/../cpp-build" | ||||
|      if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
|        "$CPP_BUILD"/caffe2/bin/test_jit | ||||
|      else | ||||
|        "$CPP_BUILD"/caffe2/bin/test_jit "[cpu]" | ||||
|      fi | ||||
|      python tools/download_mnist.py --quiet -d test/cpp/api/mnist | ||||
|      OMP_NUM_THREADS=2 "$CPP_BUILD"/caffe2/bin/test_api | ||||
|   fi | ||||
| } | ||||
|  | ||||
| test_custom_script_ops() { | ||||
|   if [[ "$BUILD_TEST_LIBTORCH" == "1" ]]; then | ||||
|     echo "Testing custom script operators" | ||||
|     CUSTOM_OP_BUILD="$PWD/../custom-op-build" | ||||
|     pushd test/custom_operator | ||||
|     cp -r "$CUSTOM_OP_BUILD" build | ||||
|     # Run tests Python-side and export a script module. | ||||
|     python test_custom_ops.py -v | ||||
|     python model.py --export-script-module=model.pt | ||||
|     # Run tests C++-side and load the exported script module. | ||||
|     build/test_custom_ops ./model.pt | ||||
|     popd | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then | ||||
|   test_python_nn | ||||
|   test_python_all_except_nn | ||||
|   test_aten | ||||
|   test_torchvision | ||||
|   test_libtorch | ||||
|   test_custom_script_ops | ||||
| else | ||||
|   if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then | ||||
|     test_python_nn | ||||
|   elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then | ||||
|     test_python_all_except_nn | ||||
|     test_aten | ||||
|     test_torchvision | ||||
|     test_libtorch | ||||
|     test_custom_script_ops | ||||
|   fi | ||||
| fi | ||||
| @ -1,155 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # If you want to rebuild, run this with REBUILD=1 | ||||
| # If you want to build with CUDA, run this with USE_CUDA=1 | ||||
| # If you want to build without CUDA, run this with USE_CUDA=0 | ||||
|  | ||||
| if [ ! -f setup.py ]; then | ||||
|   echo "ERROR: Please run this build script from PyTorch root directory." | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| COMPACT_JOB_NAME=pytorch-win-ws2016-cuda9-cudnn7-py3-build | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID} | ||||
| if [[ ${JOB_NAME} == *"develop"* ]]; then | ||||
|   export IMAGE_COMMIT_TAG=develop-${IMAGE_COMMIT_TAG} | ||||
| fi | ||||
|  | ||||
| mkdir -p ci_scripts/ | ||||
|  | ||||
| cat >ci_scripts/upload_image.py << EOL | ||||
|  | ||||
| import os | ||||
| import sys | ||||
| import boto3 | ||||
|  | ||||
| IMAGE_COMMIT_TAG = os.getenv('IMAGE_COMMIT_TAG') | ||||
|  | ||||
| session = boto3.session.Session() | ||||
| s3 = session.resource('s3') | ||||
| data = open(sys.argv[1], 'rb') | ||||
| s3.Bucket('ossci-windows-build').put_object(Key='pytorch/'+IMAGE_COMMIT_TAG+'.7z', Body=data) | ||||
| object_acl = s3.ObjectAcl('ossci-windows-build','pytorch/'+IMAGE_COMMIT_TAG+'.7z') | ||||
| response = object_acl.put(ACL='public-read') | ||||
|  | ||||
| EOL | ||||
|  | ||||
| cat >ci_scripts/build_pytorch.bat <<EOL | ||||
|  | ||||
| set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\ProgramData\\chocolatey\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH% | ||||
|  | ||||
| :: Install MKL | ||||
| if "%REBUILD%"=="" ( | ||||
|   if "%BUILD_ENVIRONMENT%"=="" ( | ||||
|     curl -k https://s3.amazonaws.com/ossci-windows/mkl_2018.2.185.7z --output mkl.7z | ||||
|   ) else ( | ||||
|     aws s3 cp s3://ossci-windows/mkl_2018.2.185.7z mkl.7z --quiet | ||||
|   ) | ||||
|   7z x -aoa mkl.7z -omkl | ||||
| ) | ||||
| set CMAKE_INCLUDE_PATH=%cd%\\mkl\\include | ||||
| set LIB=%cd%\\mkl\\lib;%LIB | ||||
|  | ||||
| :: Install MAGMA | ||||
| if "%REBUILD%"=="" ( | ||||
|   if "%BUILD_ENVIRONMENT%"=="" ( | ||||
|     curl -k https://s3.amazonaws.com/ossci-windows/magma_cuda90_release_mkl_2018.2.185.7z --output magma_cuda90_release_mkl_2018.2.185.7z | ||||
|   ) else ( | ||||
|     aws s3 cp s3://ossci-windows/magma_cuda90_release_mkl_2018.2.185.7z magma_cuda90_release_mkl_2018.2.185.7z --quiet | ||||
|   ) | ||||
|   7z x -aoa magma_cuda90_release_mkl_2018.2.185.7z -omagma | ||||
| ) | ||||
| set MAGMA_HOME=%cd%\\magma | ||||
|  | ||||
| :: Install sccache | ||||
| mkdir %CD%\\tmp_bin | ||||
| if "%REBUILD%"=="" ( | ||||
|   :check_sccache | ||||
|   %CD%\\tmp_bin\\sccache.exe --show-stats || ( | ||||
|     taskkill /im sccache.exe /f /t || ver > nul | ||||
|     del %CD%\\tmp_bin\\sccache.exe | ||||
|     if "%BUILD_ENVIRONMENT%"=="" ( | ||||
|       curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\\tmp_bin\\sccache.exe | ||||
|     ) else ( | ||||
|       aws s3 cp s3://ossci-windows/sccache.exe %CD%\\tmp_bin\\sccache.exe | ||||
|     ) | ||||
|     goto :check_sccache | ||||
|   ) | ||||
| ) | ||||
|  | ||||
| :: Install Miniconda3 | ||||
| if "%REBUILD%"=="" ( | ||||
|   IF EXIST C:\\Jenkins\\Miniconda3 ( rd /s /q C:\\Jenkins\\Miniconda3 ) | ||||
|   curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O | ||||
|   .\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=C:\\Jenkins\\Miniconda3 | ||||
| ) | ||||
| call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3 | ||||
| if "%REBUILD%"=="" ( call conda install -y -q numpy cffi pyyaml boto3 ) | ||||
|  | ||||
| :: Install ninja | ||||
| if "%REBUILD%"=="" ( pip install ninja ) | ||||
|  | ||||
| call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvarsall.bat" x86_amd64 | ||||
|  | ||||
| git submodule update --init --recursive | ||||
|  | ||||
| set PATH=%CD%\\tmp_bin;C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\bin;C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\libnvvp;%PATH% | ||||
| set CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set CUDA_PATH_V9_0=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set NVTOOLSEXT_PATH=C:\\Program Files\\NVIDIA Corporation\\NvToolsExt | ||||
| set CUDNN_LIB_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\lib\\x64 | ||||
| set CUDA_TOOLKIT_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set CUDNN_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
|  | ||||
| :: Target only our CI GPU machine's CUDA arch to speed up the build | ||||
| set TORCH_CUDA_ARCH_LIST=5.2 | ||||
|  | ||||
| sccache --stop-server | ||||
| sccache --start-server | ||||
| sccache --zero-stats | ||||
| set CC=sccache cl | ||||
| set CXX=sccache cl | ||||
|  | ||||
| set DISTUTILS_USE_SDK=1 | ||||
|  | ||||
| set CMAKE_GENERATOR=Ninja | ||||
|  | ||||
| if not "%USE_CUDA%"=="1" ( | ||||
|   if "%REBUILD%"=="" ( | ||||
|     set NO_CUDA=1 | ||||
|     python setup.py install | ||||
|   ) | ||||
|   if errorlevel 1 exit /b 1 | ||||
|   if not errorlevel 0 exit /b 1 | ||||
| ) | ||||
|  | ||||
| if not "%USE_CUDA%"=="0" ( | ||||
|   if "%REBUILD%"=="" ( | ||||
|     sccache --show-stats | ||||
|     sccache --zero-stats | ||||
|     rd /s /q C:\\Jenkins\\Miniconda3\\Lib\\site-packages\\torch | ||||
|     copy %CD%\\tmp_bin\\sccache.exe tmp_bin\\nvcc.exe | ||||
|   ) | ||||
|  | ||||
|   set CUDA_NVCC_EXECUTABLE=%CD%\\tmp_bin\\nvcc | ||||
|  | ||||
|   if "%REBUILD%"=="" set NO_CUDA=0 | ||||
|  | ||||
|   python setup.py install && sccache --show-stats && ( | ||||
|     if "%BUILD_ENVIRONMENT%"=="" ( | ||||
|       echo "NOTE: To run \`import torch\`, please make sure to activate the conda environment by running \`call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3\` in Command Prompt before running Git Bash." | ||||
|     ) else ( | ||||
|       7z a %IMAGE_COMMIT_TAG%.7z C:\\Jenkins\\Miniconda3\\Lib\\site-packages\\torch && python ci_scripts\\upload_image.py %IMAGE_COMMIT_TAG%.7z | ||||
|     ) | ||||
|   ) | ||||
| ) | ||||
|  | ||||
| EOL | ||||
|  | ||||
| ci_scripts/build_pytorch.bat | ||||
| if [ ! -f $IMAGE_COMMIT_TAG.7z ] && [ ! ${BUILD_ENVIRONMENT} == "" ]; then | ||||
|     exit 1 | ||||
| fi | ||||
| echo "BUILD PASSED" | ||||
| @ -1,93 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| COMPACT_JOB_NAME=pytorch-win-ws2016-cuda9-cudnn7-py3-test | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| export IMAGE_COMMIT_TAG=${BUILD_ENVIRONMENT}-${IMAGE_COMMIT_ID} | ||||
| if [[ ${JOB_NAME} == *"develop"* ]]; then | ||||
|   export IMAGE_COMMIT_TAG=develop-${IMAGE_COMMIT_TAG} | ||||
| fi | ||||
|  | ||||
| mkdir -p ci_scripts/ | ||||
|  | ||||
| cat >ci_scripts/download_image.py << EOL | ||||
|  | ||||
| import os | ||||
| import sys | ||||
| import boto3 | ||||
| import botocore | ||||
|  | ||||
| IMAGE_COMMIT_TAG = os.getenv('IMAGE_COMMIT_TAG') | ||||
|  | ||||
| session = boto3.session.Session() | ||||
| s3 = session.resource('s3') | ||||
| BUCKET_NAME = 'ossci-windows-build' | ||||
| KEY = 'pytorch/'+IMAGE_COMMIT_TAG+'.7z' | ||||
| LOCAL_FILE_PATH = sys.argv[1] | ||||
| try: | ||||
|     s3.Bucket(BUCKET_NAME).download_file(KEY, LOCAL_FILE_PATH) | ||||
| except botocore.exceptions.ClientError as e: | ||||
|     if e.response['Error']['Code'] == "404": | ||||
|         print("The object does not exist.") | ||||
|     else: | ||||
|         raise | ||||
|  | ||||
| EOL | ||||
|  | ||||
| cat >ci_scripts/setup_pytorch_env.bat <<EOL | ||||
|  | ||||
| set PATH=C:\\Program Files\\CMake\\bin;C:\\Program Files\\7-Zip;C:\\ProgramData\\chocolatey\\bin;C:\\Program Files\\Git\\cmd;C:\\Program Files\\Amazon\\AWSCLI;%PATH% | ||||
|  | ||||
| :: Install Miniconda3 | ||||
| IF EXIST C:\\Jenkins\\Miniconda3 ( rd /s /q C:\\Jenkins\\Miniconda3 ) | ||||
| curl https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O | ||||
| .\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=C:\\Jenkins\\Miniconda3 | ||||
| call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3 | ||||
| call conda install -y -q numpy mkl cffi pyyaml boto3 | ||||
|  | ||||
| pip install ninja | ||||
|  | ||||
| call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvarsall.bat" x86_amd64 | ||||
|  | ||||
| set PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\bin;C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\libnvvp;%PATH% | ||||
| set CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set CUDA_PATH_V9_0=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set NVTOOLSEXT_PATH=C:\\Program Files\\NVIDIA Corporation\\NvToolsExt | ||||
| set CUDNN_LIB_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\lib\\x64 | ||||
| set CUDA_TOOLKIT_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set CUDNN_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0 | ||||
| set PYTHONPATH=%CD%\\test;%PYTHONPATH% | ||||
|  | ||||
| cd test/ | ||||
|  | ||||
| python ..\\ci_scripts\\download_image.py %IMAGE_COMMIT_TAG%.7z | ||||
|  | ||||
| 7z x %IMAGE_COMMIT_TAG%.7z | ||||
|  | ||||
| cd .. | ||||
|  | ||||
| EOL | ||||
|  | ||||
| cat >ci_scripts/test_python_nn.bat <<EOL | ||||
| call ci_scripts/setup_pytorch_env.bat | ||||
| cd test/ && python run_test.py --include nn --verbose && cd .. | ||||
| EOL | ||||
|  | ||||
| cat >ci_scripts/test_python_all_except_nn.bat <<EOL | ||||
| call ci_scripts/setup_pytorch_env.bat | ||||
| cd test/ && python run_test.py --exclude nn --verbose && cd .. | ||||
| EOL | ||||
|  | ||||
| run_tests() { | ||||
|     if [ -z "${JOB_BASE_NAME}" ] || [[ "${JOB_BASE_NAME}" == *-test ]]; then | ||||
|         ci_scripts/test_python_nn.bat && ci_scripts/test_python_all_except_nn.bat | ||||
|     else | ||||
|         if [[ "${JOB_BASE_NAME}" == *-test1 ]]; then | ||||
|             ci_scripts/test_python_nn.bat | ||||
|         elif [[ "${JOB_BASE_NAME}" == *-test2 ]]; then | ||||
|             ci_scripts/test_python_all_except_nn.bat | ||||
|         fi | ||||
|     fi | ||||
| } | ||||
|  | ||||
| run_tests && echo "TEST PASSED" | ||||
| @ -1,31 +0,0 @@ | ||||
| # https://travis-ci.org/zdevito/ATen | ||||
| language: python | ||||
| python: | ||||
|     - 2.7 | ||||
|     - 3.6 | ||||
|  | ||||
| dist: trusty | ||||
|  | ||||
| before_install: | ||||
|     - sudo apt-get install -qq valgrind | ||||
|  | ||||
| install: | ||||
|     - travis_retry pip install pyyaml typing | ||||
|  | ||||
| script: | ||||
|     - cd aten | ||||
|     - mkdir build install | ||||
|     - cd build | ||||
|     - cmake .. -DUSE_CUDA=OFF -DCMAKE_INSTALL_PREFIX=../install | ||||
|     - make install | ||||
|     - ../tools/run_tests.sh . | ||||
|     - cd .. | ||||
|     - tools/test_install.sh $(pwd)/install $(pwd) | ||||
|  | ||||
| matrix: | ||||
|     fast_finish: true | ||||
|     include: | ||||
|         env: LINT_CHECK | ||||
|         python: "2.7" | ||||
|         install: pip install flake8 | ||||
|         script: flake8 | ||||
							
								
								
									
										48
									
								
								.travis.yml
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								.travis.yml
									
									
									
									
									
								
							| @ -3,6 +3,38 @@ language: python | ||||
| dist: trusty | ||||
| git: | ||||
|   submodules: false | ||||
| python: | ||||
|     - 2.7.9 | ||||
|     - 2.7 | ||||
|     - 3.5 | ||||
|     - 3.6 | ||||
|     - nightly | ||||
|  | ||||
| cache: | ||||
|     - ccache | ||||
|     - directories: | ||||
|         - $HOME/.ccache | ||||
|  | ||||
| install: | ||||
|     - unset CCACHE_DISABLE | ||||
|     - export CCACHE_DIR=$HOME/.ccache | ||||
|     - export CC="ccache gcc-5" | ||||
|     - export CXX="ccache g++-5" | ||||
|     - ccache --show-stats | ||||
|     - travis_retry pip install --upgrade pip setuptools wheel | ||||
|     - travis_retry pip install -r requirements.txt --only-binary=scipy | ||||
|     - git submodule update --init --recursive | ||||
|     - MAX_JOBS=8 python setup.py install | ||||
|  | ||||
| addons: | ||||
|     apt: | ||||
|         sources: | ||||
|             - ubuntu-toolchain-r-test | ||||
|         packages: | ||||
|             - g++-5 | ||||
|  | ||||
| script: | ||||
|     - OMP_NUM_THREADS=2 ./test/run_test.sh | ||||
|  | ||||
| # This reportedly works around an issue downloading packages from pypi on | ||||
| # travis.  Consider removing this after the underlying issue is fixed. | ||||
| @ -12,20 +44,8 @@ sudo: false | ||||
| matrix: | ||||
|     fast_finish: true | ||||
|     include: | ||||
|       - env: LINT_CHECK | ||||
|         env: LINT_CHECK | ||||
|         python: "2.7" | ||||
|         addons: true | ||||
|         install: pip install flake8 | ||||
|         script: flake8 | ||||
|       - env: LINT_CHECK | ||||
|         python: "3.7" | ||||
|         dist: xenial    # required for Python 3.7 (travis-ci/travis-ci#9069) | ||||
|         sudo: required  # required for Python 3.7 (travis-ci/travis-ci#9069) | ||||
|         install: pip install flake8 | ||||
|         script: flake8 | ||||
|       - env: MYPY_TYPE_CHECK | ||||
|         python: "3.6" | ||||
|         install: pip install mypy mypy-extensions | ||||
|         script: mypy @mypy-files.txt | ||||
|       - env: CPP_DOC_CHECK | ||||
|         install: sudo apt-get install -y doxygen | ||||
|         script: cd docs/cpp && ./check-doxygen.sh | ||||
|  | ||||
							
								
								
									
										6
									
								
								CITATION
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								CITATION
									
									
									
									
									
								
							| @ -1,6 +0,0 @@ | ||||
| @inproceedings{paszke2017automatic, | ||||
|   title={Automatic differentiation in PyTorch}, | ||||
|   author={Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam}, | ||||
|   booktitle={NIPS-W}, | ||||
|   year={2017} | ||||
| } | ||||
							
								
								
									
										421
									
								
								CMakeLists.txt
									
									
									
									
									
								
							
							
						
						
									
										421
									
								
								CMakeLists.txt
									
									
									
									
									
								
							| @ -1,421 +0,0 @@ | ||||
| cmake_minimum_required(VERSION 3.5 FATAL_ERROR) | ||||
| #cmake_policy(SET CMP0022 NEW) | ||||
| #cmake_policy(SET CMP0023 NEW) | ||||
|  | ||||
| # ---[ Project and semantic versioning. | ||||
| project(Caffe2 CXX C) | ||||
|  | ||||
| set(CAFFE2_VERSION_MAJOR 0) | ||||
| set(CAFFE2_VERSION_MINOR 8) | ||||
| set(CAFFE2_VERSION_PATCH 2) | ||||
| set(CAFFE2_VERSION | ||||
|     "${CAFFE2_VERSION_MAJOR}.${CAFFE2_VERSION_MINOR}.${CAFFE2_VERSION_PATCH}") | ||||
|  | ||||
| # One variable that determines whether the current cmake process is being run | ||||
| # with the main Caffe2 library. This is useful for building modules - if | ||||
| # modules are built with the main Caffe2 library then one does not need to do | ||||
| # find caffe2 in the cmake script. One can usually guard it in some way like | ||||
| #    if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) | ||||
| #      find_package(Caffe2 REQUIRED) | ||||
| #    endif() | ||||
| set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON) | ||||
|  | ||||
| if(NOT DEFINED BLAS_SET_BY_USER) | ||||
|   if(DEFINED BLAS) | ||||
|     set(BLAS_SET_BY_USER TRUE) | ||||
|   else() | ||||
|     message(STATUS "Not forcing any particular BLAS to be found") | ||||
|     set(BLAS_SET_BY_USER FALSE) | ||||
|   endif() | ||||
|   set(BLAS_SET_BY_USER ${BLAS_SET_BY_USER} CACHE STRING "Marks whether BLAS was manually set by user or auto-detected") | ||||
| endif() | ||||
|  | ||||
| # Apple specific | ||||
| if(APPLE) | ||||
|   # These lines are an attempt to make find_package(cuda) pick up | ||||
|   # libcuda.dylib, and not cuda.framework.  It doesn't work all | ||||
|   # the time, but it seems to help for some users. | ||||
|   # TODO: replace this with a more robust fix | ||||
|   set(CMAKE_FIND_FRAMEWORK LAST) | ||||
|   set(CMAKE_FIND_APPBUNDLE LAST) | ||||
|  | ||||
|   # Get clang version on macOS | ||||
|   EXECUTE_PROCESS( COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE clang_full_version_string ) | ||||
|   string(REGEX REPLACE "Apple LLVM version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string}) | ||||
|   MESSAGE( STATUS "CLANG_VERSION_STRING:         " ${CLANG_VERSION_STRING} ) | ||||
|  | ||||
|  | ||||
|   # RPATH stuff | ||||
|   set(CMAKE_MACOSX_RPATH ON) | ||||
| endif() | ||||
|  | ||||
| # ---[ Options. | ||||
| # Note to developers: if you add an option below, make sure you also add it to | ||||
| # cmake/Summary.cmake so that the summary prints out the option values. | ||||
| include(CMakeDependentOption) | ||||
| option(BUILD_TORCH "Build Torch" OFF) | ||||
| option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) | ||||
| option(BUILD_ATEN_MOBILE "Build ATen for Android and iOS" OFF) | ||||
| option(BUILD_BINARY "Build C++ binaries" OFF) | ||||
| option(BUILD_DOCS "Build Caffe2 documentation" OFF) | ||||
| option(BUILD_CUSTOM_PROTOBUF "Build and use Caffe2's own protobuf under third_party" ON) | ||||
| option(BUILD_PYTHON "Build Python binaries" ON) | ||||
| option(BUILD_CAFFE2_OPS "Build Caffe2 operators" ON) | ||||
| option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON) | ||||
| cmake_dependent_option( | ||||
|     CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON | ||||
|     "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF) | ||||
| cmake_dependent_option( | ||||
|     CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON | ||||
|     "NOT BUILD_SHARED_LIBS" OFF) | ||||
| option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF) | ||||
| cmake_dependent_option( | ||||
|     INSTALL_TEST "Install test binaries if BUILD_TEST is on" OFF | ||||
|     "BUILD_TEST" OFF) | ||||
| option(USE_ACL "Use ARM Compute Library" OFF) | ||||
| option(USE_ASAN "Use Address Sanitizer" OFF) | ||||
| option(USE_CUDA "Use CUDA" ON) | ||||
| option(USE_ROCM "Use ROCm" OFF) | ||||
| option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF) | ||||
| cmake_dependent_option( | ||||
|     USE_CUDNN "Use cuDNN" ON | ||||
|     "USE_CUDA" OFF) | ||||
| option(USE_FFMPEG "Use ffmpeg" OFF) | ||||
| option(USE_GFLAGS "Use GFLAGS" ON) | ||||
| option(USE_GLOG "Use GLOG" ON) | ||||
| option(USE_LEVELDB "Use LEVELDB" ON) | ||||
| option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF) | ||||
| option(USE_LMDB "Use LMDB" ON) | ||||
| option(USE_METAL "Use Metal for iOS build" ON) | ||||
| option(USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON) | ||||
| option(USE_NATIVE_ARCH "Use -march=native" OFF) | ||||
| option(USE_NCCL "Use NCCL" ON) | ||||
| option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF) | ||||
| option(USE_NERVANA_GPU "Use Nervana GPU backend" OFF) | ||||
| option(USE_NNAPI "Use NNAPI" OFF) | ||||
| option(USE_NNPACK "Use NNPACK" ON) | ||||
| option(USE_NUMA "Use NUMA (only available on Linux)" ON) | ||||
| cmake_dependent_option( | ||||
|     USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on." OFF | ||||
|     "USE_CUDA" OFF) | ||||
| option(USE_OBSERVERS "Use observers module." OFF) | ||||
| option(USE_OPENCL "Use OpenCL" OFF) | ||||
| option(USE_OPENCV "Use OpenCV" ON) | ||||
| option(USE_OPENMP "Use OpenMP for parallel code" OFF) | ||||
| option(USE_PROF "Use profiling" OFF) | ||||
| option(USE_REDIS "Use Redis" OFF) | ||||
| option(USE_ROCKSDB "Use RocksDB" OFF) | ||||
| option(USE_SNPE "Use Qualcomm's SNPE library" OFF) | ||||
| option(USE_SYSTEM_EIGEN_INSTALL | ||||
|     "Use system Eigen instead of the one under third_party" OFF) | ||||
| option(USE_TENSORRT "Using Nvidia TensorRT library" OFF) | ||||
| option(USE_ZMQ "Use ZMQ" OFF) | ||||
| option(USE_ZSTD "Use ZSTD" OFF) | ||||
| option(USE_MKLDNN "Use MKLDNN" OFF) | ||||
| option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON) | ||||
| option(USE_MKLML "Use MKLML interface in MKL BLAS" ON) | ||||
| option(USE_DISTRIBUTED "Use distributed" ON) | ||||
| cmake_dependent_option( | ||||
|     USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON | ||||
|     "USE_DISTRIBUTED" OFF) | ||||
| cmake_dependent_option( | ||||
|     USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON | ||||
|     "USE_DISTRIBUTED" OFF) | ||||
| cmake_dependent_option( | ||||
|     USE_GLOO_IBVERBS "Use Gloo IB verbs for distributed. Only available if USE_GLOO is on." OFF | ||||
|     "USE_GLOO" OFF) | ||||
| option(TORCH_USE_CEREAL "Build the C++ API with Cereal for serialization support" OFF) | ||||
|  | ||||
| # Used when building Caffe2 through setup.py | ||||
| option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF) | ||||
|  | ||||
| SET(ONNX_NAMESPACE "onnx_c2" CACHE STRING "onnx namespace") | ||||
|  | ||||
| if (ANDROID OR IOS) | ||||
|   set(BUILD_ATEN_MOBILE ON) | ||||
| endif() | ||||
|  | ||||
| # ---[ CMake scripts + modules | ||||
| list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) | ||||
|  | ||||
| # ---[ CMake build directories | ||||
| set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | ||||
| set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | ||||
| set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) | ||||
|  | ||||
| enable_testing() | ||||
|  | ||||
| # ---[ Build variables set within the cmake tree | ||||
| include(cmake/BuildVariables.cmake) | ||||
| set(CAFFE2_WHITELIST "" CACHE STRING "A whitelist file of files that one should build.") | ||||
|  | ||||
| # Set default build type | ||||
| if(NOT CMAKE_BUILD_TYPE) | ||||
|     message(STATUS "Build type not set - defaulting to Release") | ||||
|     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage." FORCE) | ||||
| endif() | ||||
|  | ||||
| # ---[ Misc checks to cope with various compiler modes | ||||
| include(cmake/MiscCheck.cmake) | ||||
|  | ||||
| # External projects | ||||
| include(ExternalProject) | ||||
|  | ||||
| # ---[ Utils | ||||
| # TODO: merge the following 3 files into cmake/public/utils.cmake. | ||||
| include(cmake/Utils.cmake) | ||||
| include(cmake/public/utils.cmake) | ||||
|  | ||||
| # ---[ Dependencies | ||||
| include(cmake/Dependencies.cmake) | ||||
|  | ||||
| # ---[ Whitelist file if whitelist is specified | ||||
| include(cmake/Whitelist.cmake) | ||||
|  | ||||
| # ---[ Set link flag, handle additional deps for gcc 4.8 and above | ||||
| if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.8.0 AND NOT ANDROID) | ||||
|   message(STATUS "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line") | ||||
|   list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc) | ||||
| endif() | ||||
|  | ||||
| # ---[ Build flags | ||||
| set(CMAKE_C_STANDARD 99) | ||||
| set(CMAKE_CXX_STANDARD 11) | ||||
| if(NOT MSVC) | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fPIC") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") | ||||
|   # Eigen fails to build with some versions, so convert this to a warning | ||||
|   # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459 | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-field-initializers") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-type-limits") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-array-bounds") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-function") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-overflow") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-strict-aliasing") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations") | ||||
|   if (CMAKE_COMPILER_IS_GNUCXX AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0)) | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow") | ||||
|   endif() | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pedantic") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-decls") | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=old-style-cast") | ||||
|   # These flags are not available in GCC-4.8.5. Set only when using clang. | ||||
|   # Compared against https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/Option-Summary.html | ||||
|   if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-invalid-partial-specialization") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-typedef-redefinition") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-warning-option") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-inconsistent-missing-override") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-aligned-allocation-unavailable") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++14-extensions") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constexpr-not-const") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments") | ||||
|   endif() | ||||
|   if ((APPLE AND (NOT ("${CLANG_VERSION_STRING}" VERSION_LESS "9.0"))) | ||||
|     OR (CMAKE_COMPILER_IS_GNUCXX | ||||
|     AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0 AND NOT APPLE))) | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new") | ||||
|   endif() | ||||
|   if ($ENV{WERROR}) | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") | ||||
|   endif($ENV{WERROR}) | ||||
|   if (NOT APPLE) | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized") | ||||
|   endif() | ||||
| else() | ||||
|   foreach(flag_var | ||||
|       CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE | ||||
|       CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) | ||||
|     if (${CAFFE2_USE_MSVC_STATIC_RUNTIME}) | ||||
|       if(${flag_var} MATCHES "/MD") | ||||
|         string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") | ||||
|       endif(${flag_var} MATCHES "/MD") | ||||
|     else() | ||||
|       if(${flag_var} MATCHES "/MT") | ||||
|         string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}") | ||||
|       endif() | ||||
|     endif() | ||||
|     # /bigobj increases number of sections in .obj file, which is needed to link | ||||
|     # against libaries in Python 2.7 under Windows | ||||
|     set(${flag_var} "${${flag_var}} /MP /bigobj") | ||||
|   endforeach(flag_var) | ||||
| endif() | ||||
|  | ||||
| set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-omit-frame-pointer -O0") | ||||
| set (CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_STATIC_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -O0") | ||||
| if (USE_ASAN) | ||||
|     set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address") | ||||
|     set (CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_STATIC_LINKER_FLAGS_DEBUG} -fsanitize=address") | ||||
| endif() | ||||
|  | ||||
| if (APPLE) | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-braces") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++14-extensions") | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constexpr-not-const") | ||||
| endif() | ||||
|  | ||||
| if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0.0) | ||||
|   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow") | ||||
| endif() | ||||
|  | ||||
| if(ANDROID) | ||||
|   if(CMAKE_COMPILER_IS_GNUCXX) | ||||
|     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s") | ||||
|   else() | ||||
|     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s") | ||||
|   endif() | ||||
| endif() | ||||
|  | ||||
| if(NOT APPLE AND UNIX) | ||||
|   list(APPEND Caffe2_DEPENDENCY_LIBS dl) | ||||
| endif() | ||||
|  | ||||
| # Prefix path to Caffe2 headers. | ||||
| # If a directory containing installed Caffe2 headers was inadvertently | ||||
| # added to the list of include directories, prefixing | ||||
| # PROJECT_SOURCE_DIR means this source tree always takes precedence. | ||||
| include_directories(BEFORE ${PROJECT_SOURCE_DIR}) | ||||
|  | ||||
| # Prefix path to generated Caffe2 headers. | ||||
| # These need to take precedence over their empty counterparts located | ||||
| # in PROJECT_SOURCE_DIR. | ||||
| include_directories(BEFORE ${PROJECT_BINARY_DIR}) | ||||
|  | ||||
| include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/) | ||||
|  | ||||
| # ---[ Main build | ||||
| add_subdirectory(caffe2) | ||||
|  | ||||
| # --[ Documentation | ||||
| if(BUILD_DOCS) | ||||
|   # check if Doxygen is installed | ||||
|   find_package(Doxygen) | ||||
|   if (DOXYGEN_FOUND) | ||||
|     message("Generating documentation") | ||||
|  | ||||
|     set(DOXYGEN_C_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/.Doxyfile-c) | ||||
|     set(DOXYGEN_C_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/Doxyfile-c) | ||||
|     set(DOXYGEN_P_IN ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/.Doxyfile-python) | ||||
|     set(DOXYGEN_P_OUT ${CMAKE_CURRENT_SOURCE_DIR}/docs/caffe2/Doxyfile-python) | ||||
|  | ||||
|     if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/docs) | ||||
|       file(REMOVE_RECURSE ${CMAKE_CURRENT_BINARY_DIR}/docs) | ||||
|     endif() | ||||
|  | ||||
|     file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/docs) | ||||
|     configure_file(${DOXYGEN_C_IN} ${DOXYGEN_C_OUT} @ONLY) | ||||
|     configure_file(${DOXYGEN_P_IN} ${DOXYGEN_P_OUT} @ONLY) | ||||
|  | ||||
|     add_custom_target(doc_doxygen_c ALL | ||||
|         COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_C_OUT} | ||||
|         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||||
|         COMMENT "Generating C++ API documentation with Doxygen" | ||||
|         VERBATIM) | ||||
|  | ||||
|     add_custom_target(doc_doxygen_python ALL | ||||
|         COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_P_OUT} | ||||
|         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||||
|         COMMENT "Generating Python API documentation with Doxygen" | ||||
|         VERBATIM) | ||||
|   else() | ||||
|     message(FATAL_ERROR "Doxygen needs to be installed to generate the documentation") | ||||
|   endif() | ||||
| endif() | ||||
|  | ||||
| # ---[ CMake related files | ||||
| # Uninistall option. | ||||
| if(NOT TARGET caffe2_uninstall) | ||||
|   configure_file( | ||||
|       ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in | ||||
|       ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake | ||||
|       IMMEDIATE @ONLY) | ||||
|  | ||||
|   add_custom_target(caffe2_uninstall | ||||
|       COMMAND ${CMAKE_COMMAND} -P | ||||
|       ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) | ||||
| endif() | ||||
|  | ||||
| # ---[ Make configuration files for cmake to allow dependent libraries | ||||
| # easier access to Caffe2. | ||||
|  | ||||
| if ((NOT USE_GLOG) OR (NOT USE_GFLAGS) OR BUILD_CUSTOM_PROTOBUF) | ||||
|   message(WARNING | ||||
|       "Generated cmake files are only fully tested if one builds " | ||||
|       "with system glog, gflags, and protobuf. Other settings may " | ||||
|       "generate files that are not well tested.") | ||||
| endif() | ||||
|  | ||||
| if (USE_CUDA OR USE_ROCM) | ||||
|   # TODO: check if we should include other cuda dependency libraries | ||||
|   # to the interface as well. | ||||
|  | ||||
| endif() | ||||
|  | ||||
| # Note(jiayq): when building static libraries, all PRIVATE dependencies | ||||
| # will also become interface libraries, and as a result if there are any | ||||
| # dependency libraries that are not exported, the following install export | ||||
| # script will fail. As a result, we will only provide the targets cmake | ||||
| # files for shared lib installation. For more info, read: | ||||
| # https://cmake.org/pipermail/cmake/2016-May/063400.html | ||||
| if (BUILD_SHARED_LIBS) | ||||
|   configure_file( | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/Caffe2ConfigVersion.cmake.in | ||||
|       ${PROJECT_BINARY_DIR}/Caffe2ConfigVersion.cmake | ||||
|       @ONLY) | ||||
|   configure_file( | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in | ||||
|       ${PROJECT_BINARY_DIR}/Caffe2Config.cmake | ||||
|       @ONLY) | ||||
|   install(FILES | ||||
|       ${PROJECT_BINARY_DIR}/Caffe2ConfigVersion.cmake | ||||
|       ${PROJECT_BINARY_DIR}/Caffe2Config.cmake | ||||
|       DESTINATION share/cmake/Caffe2 | ||||
|       COMPONENT dev) | ||||
|   install(FILES | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/threads.cmake | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake | ||||
|       DESTINATION share/cmake/Caffe2/public | ||||
|       COMPONENT dev) | ||||
|   install(DIRECTORY | ||||
|       ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix | ||||
|       DESTINATION share/cmake/Caffe2/ | ||||
|       COMPONENT dev) | ||||
|   install(EXPORT Caffe2Targets DESTINATION share/cmake/Caffe2 | ||||
|       FILE Caffe2Targets.cmake | ||||
|       COMPONENT dev) | ||||
| else() | ||||
|   message(WARNING | ||||
|       "Generated cmake files are only available when building " | ||||
|       "shared libs.") | ||||
| endif() | ||||
|  | ||||
| # ---[ Modules | ||||
| add_subdirectory(modules) | ||||
|  | ||||
| # ---[ Binaries | ||||
| # Binaries will be built after the Caffe2 main libraries and the modules | ||||
| # are built. For the binaries, they will be linked to the Caffe2 main | ||||
| # libraries, as well as all the modules that are built with Caffe2 (the ones | ||||
| # built in the previous Modules section above). | ||||
| if (BUILD_BINARY) | ||||
|   add_subdirectory(binaries) | ||||
| endif() | ||||
|  | ||||
| include(cmake/Summary.cmake) | ||||
| caffe2_print_configuration_summary() | ||||
							
								
								
									
										25
									
								
								CODEOWNERS
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								CODEOWNERS
									
									
									
									
									
								
							| @ -1,25 +0,0 @@ | ||||
| # This is a comment. | ||||
| # Each line is a file pattern followed by one or more owners. | ||||
|  | ||||
| /aten/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /aten/src/ATen/core/ | ||||
| /torch/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /docs/source @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ssnl @zou3519 | ||||
| /docs/cpp @goldsborough @ebetica @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /test @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /tools @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /README.md @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /setup.py @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /requirements.txt @apaszke @soumith @colesbury @gchanan @zdevito @ezyang | ||||
| /torch/csrc/api/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ebetica @goldsborough | ||||
| /test/cpp/api/ @apaszke @soumith @colesbury @gchanan @zdevito @ezyang @ebetica @goldsborough | ||||
| /torch/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing | ||||
| /torch/csrc/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing | ||||
| /torch/csrc/jit/passes/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing | ||||
| /test/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing | ||||
| /scripts/onnx/ @anderspapitto @bddppq @dzhulgakov @ezyang @houseroad @jamesr66a @smessmer @Yangqing | ||||
| /torch/lib/c10d/ @apaszke @pietern @teng-li | ||||
| /torch/csrc/distributed/ @apaszke @pietern @teng-li | ||||
| /torch/distributed/ @apaszke @pietern @teng-li | ||||
| /test/test_c10d.py @apaszke @pietern @teng-li | ||||
| /torch/utils/cpp_extension.py @goldsborough @fmassa @apaszke @soumith @ezyang | ||||
							
								
								
									
										222
									
								
								CONTRIBUTING.md
									
									
									
									
									
								
							
							
						
						
									
										222
									
								
								CONTRIBUTING.md
									
									
									
									
									
								
							| @ -19,18 +19,18 @@ If you are not familiar with creating a Pull Request, here are some guides: | ||||
| - https://help.github.com/articles/creating-a-pull-request/ | ||||
|  | ||||
|  | ||||
| ## Developing PyTorch | ||||
| ## Developing locally with PyTorch | ||||
|  | ||||
| To develop PyTorch on your machine, here are some tips: | ||||
| To locally develop with PyTorch, here are some tips: | ||||
|  | ||||
| 1. Uninstall all existing PyTorch installs: | ||||
| 1. Uninstall all existing pytorch installs | ||||
| ``` | ||||
| conda uninstall pytorch | ||||
| pip uninstall torch | ||||
| pip uninstall torch # run this command twice | ||||
| ``` | ||||
|  | ||||
| 2. Clone a copy of PyTorch from source: | ||||
| 2. Locally clone a copy of PyTorch from source: | ||||
|  | ||||
| ``` | ||||
| git clone https://github.com/pytorch/pytorch | ||||
| @ -72,31 +72,6 @@ For example: | ||||
|  | ||||
| You do not need to repeatedly install after modifying python files. | ||||
|  | ||||
| In case you want to reinstall, make sure that you uninstall pytorch first by running `pip uninstall torch` | ||||
| and `python setup.py clean`. Then you can install in `build develop` mode again. | ||||
|  | ||||
| ## Unit testing | ||||
|  | ||||
| PyTorch's testing is located under `test/`. Run the entire test suite with | ||||
|  | ||||
| ``` | ||||
| python test/run_test.py | ||||
| ``` | ||||
|  | ||||
| or run individual test files, like `python test/test_nn.py`, for individual test suites. | ||||
|  | ||||
| ### Better local unit tests with pytest | ||||
| We don't officially support `pytest`, but it works well with our `unittest` tests and offers | ||||
| a number of useful features for local developing. Install it via `pip install pytest`. | ||||
|  | ||||
| If you want to just run tests that contain a specific substring, you can use the `-k` flag: | ||||
|  | ||||
| ``` | ||||
| pytest test/test_nn.py -k Loss -v | ||||
| ``` | ||||
|  | ||||
| The above is an example of testing a change to Loss functions: this command runs tests such as | ||||
| `TestNN.test_BCELoss` and `TestNN.test_MSELoss` and can be useful to save keystrokes. | ||||
|  | ||||
| ## Writing documentation | ||||
|  | ||||
| @ -104,18 +79,6 @@ PyTorch uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/late | ||||
| for formatting docstrings. Length of line inside docstrings block must be limited to 80 characters to | ||||
| fit into Jupyter documentation popups. | ||||
|  | ||||
| For C++ documentation (https://pytorch.org/cppdocs), we use | ||||
| [Doxygen](http://www.doxygen.nl/) and then convert it to | ||||
| [Sphinx](http://www.sphinx-doc.org/) via | ||||
| [Breathe](https://github.com/michaeljones/breathe) and | ||||
| [Exhale](https://github.com/svenevs/exhale). Check the [Doxygen | ||||
| reference](http://www.stack.nl/~dimitri/doxygen/manual/index.html) for more | ||||
| information on the documentation syntax. To build the documentation locally, | ||||
| `cd` into `docs/cpp` and then `make html`. | ||||
|  | ||||
| We run Doxygen in CI (Travis) to verify that you do not use invalid Doxygen | ||||
| commands. To run this check locally, run `./check-doxygen.sh` from inside | ||||
| `docs/cpp`. | ||||
|  | ||||
| ## Managing multiple build trees | ||||
|  | ||||
| @ -151,20 +114,19 @@ not very optimized for incremental rebuilds, this will actually be very slow. | ||||
| Far better is to only request rebuilds of the parts of the project you are | ||||
| working on: | ||||
|  | ||||
| - Working on the Python bindings?  Run `python setup.py develop` to rebuild | ||||
| - Working on `torch/csrc`?  Run `python setup.py develop` to rebuild | ||||
|   (NB: no `build` here!) | ||||
|  | ||||
| - Working on `torch/csrc` or `aten`?  Run `python setup.py rebuild_libtorch` to | ||||
|   rebuild and avoid having to rebuild other dependent libraries we | ||||
|   depend on. | ||||
| - Working on `torch/lib/TH`, did not make any cmake changes, and just want to | ||||
|   see if it compiles?  Run `(cd torch/lib/build/TH && make install -j$(getconf _NPROCESSORS_ONLN))`.  This | ||||
|   applies for any other subdirectory of `torch/lib`.  **Warning: Changes you | ||||
|   make here will not be visible from Python.**  See below. | ||||
|  | ||||
| - Working on one of the other dependent libraries? The other valid | ||||
|   targets are listed in `dep_libs` in `setup.py`. prepend `build_` to | ||||
|   get a target, and run as e.g. `python setup.py build_gloo`. | ||||
|  | ||||
| - Working on a test binary?  Run `(cd build && ninja bin/test_binary_name)` to | ||||
|   rebuild only that test binary (without rerunning cmake).  (Replace `ninja` with | ||||
|   `make` if you don't have ninja installed). | ||||
| - Working on `torch/lib` and want to run your changes / rerun cmake?  Run | ||||
|   `python setup.py build_deps`.  Note that this will rerun cmake for | ||||
|   every subdirectory in TH; if you are only working on one project, | ||||
|   consider editing `torch/lib/build_all.sh` and commenting out the | ||||
|   `build` lines of libraries you are not working on. | ||||
|  | ||||
| On the initial build, you can also speed things up with the environment | ||||
| variables `DEBUG` and `NO_CUDA`. | ||||
| @ -179,31 +141,10 @@ NO_CUDA=1 DEBUG=1 python setup.py build develop | ||||
|  | ||||
| Make sure you continue to pass these flags on subsequent builds. | ||||
|  | ||||
| ### Code completion and IDE support | ||||
|  | ||||
| When using `python setup.py develop`, PyTorch will generate | ||||
| a `compile_commands.json` file that can be used by many editors | ||||
| to provide command completion and error highlighting for PyTorch's | ||||
| C++ code. You need to `pip install ninja` to generate accurate | ||||
| information for the code in `torch/csrc`. More information at: | ||||
| - https://sarcasm.github.io/notes/dev/compilation-database.html | ||||
|  | ||||
| ### Make no-op build fast. | ||||
|  | ||||
| #### Use Ninja | ||||
| Python `setuptools` is pretty dumb, and always rebuilds every C file in a | ||||
| project.  If you install the ninja build system with `pip install ninja`, | ||||
| then PyTorch will use it to track dependencies correctly. | ||||
| If pytorch was already built, you will need to run `python setup.py clean` once | ||||
| after installing ninja for builds to succeed. | ||||
|  | ||||
| #### Use CCache | ||||
|  | ||||
| Even when dependencies are tracked with file modification, | ||||
| there are many situations where files get rebuilt when a previous | ||||
| compilation was exactly the same. | ||||
|  | ||||
| Using ccache in a situation like this is a real time-saver. However, by | ||||
| project. Using ccache in a situation like this is a real time-saver. However, by | ||||
| default, ccache does not properly support CUDA stuff, so here are the | ||||
| instructions for installing a custom `ccache` fork that has CUDA support: | ||||
|  | ||||
| @ -244,136 +185,11 @@ export CUDA_NVCC_EXECUTABLE=~/ccache/cuda/nvcc | ||||
|  | ||||
| If you are working on the CUDA code, here are some useful CUDA debugging tips: | ||||
|  | ||||
| 1. `CUDA_DEVICE_DEBUG=1` will enable CUDA device function debug symbols (`-g -G`). | ||||
|     This will be particularly helpful in debugging device code. However, it will | ||||
|     slow down the build process for about 50% (compared to only `DEBUG=1`), so use wisely. | ||||
| 2. `cuda-gdb` and `cuda-memcheck` are your best CUDA debugging friends. Unlike`gdb`, | ||||
| 1. `CUDA_DEBUG=1` will enable CUDA debugging symbols (-g -G). This is particularly | ||||
|     helpful in debugging device code. However, it will slow down the build process, | ||||
|     so use wisely. | ||||
| 2. `cuda-gdb` and `cuda-memcheck` are your best CUDA debuging friends. Unlike`gdb`, | ||||
|    `cuda-gdb` can display actual values in a CUDA tensor (rather than all zeros). | ||||
|  | ||||
|  | ||||
| Hope this helps, and thanks for considering to contribute. | ||||
|  | ||||
| ## Windows development tips | ||||
|  | ||||
| Occasionally, you will write a patch which works on Linux, but fails CI on Windows. | ||||
| There are a few aspects in which MSVC (the Windows compiler toolchain we use) is stricter | ||||
| than Linux, which are worth keeping in mind when fixing these problems. | ||||
|  | ||||
| 1. Symbols are NOT exported by default on Windows; instead, you have to explicitly | ||||
|    mark a symbol as exported/imported in a header file with `__declspec(dllexport)` / | ||||
|    `__declspec(dllimport)`.  We have codified this pattern into a set of macros | ||||
|    which follow the convention `*_API`, e.g., `AT_API` inside ATen. (Every separate | ||||
|    shared library needs a unique macro name, because symbol visibility is on a per | ||||
|    shared library basis.) | ||||
|  | ||||
|    The upshot is if you see an "unresolved external" error in your Windows build, this | ||||
|    is probably because you forgot to mark a function with `*_API`.  However, there is | ||||
|    one important counterexample to this principle: if you want a *templated* function | ||||
|    to be instantiated at the call site, do NOT mark it with `*_API` (if you do mark it, | ||||
|    you'll have to explicitly instantiate all of the specializations used by the call | ||||
|    sites.) | ||||
|  | ||||
| 2. If you link against a library, this does not make its dependencies transitively | ||||
|    visible. You must explicitly specify a link dependency against every library whose | ||||
|    symbols you use.  (This is different from Linux where in most environments, | ||||
|    transitive dependencies can be used to fulfill unresolved symbols.) | ||||
|  | ||||
| 3. If you have a Windows box (we have a few on EC2 which you can request access to) and | ||||
|    you want to run the build, the easiest way is to just run `.jenkins/pytorch/win-build.sh`. | ||||
|    If you need to rebuild, run `REBUILD=1 .jenkins/pytorch/win-build.sh` (this will avoid | ||||
|    blowing away your Conda environment.) | ||||
|  | ||||
| Even if you don't know anything about MSVC, you can use cmake to build simple programs on | ||||
| Windows; this can be helpful if you want to learn more about some peculiar linking behavior | ||||
| by reproducing it on a small example.  Here's a simple example cmake file that defines | ||||
| two dynamic libraries, one linking with the other: | ||||
|  | ||||
| ``` | ||||
| project(myproject CXX) | ||||
| set(CMAKE_CXX_STANDARD 11) | ||||
| add_library(foo SHARED foo.cpp) | ||||
| add_library(bar SHARED bar.cpp) | ||||
| # NB: don't forget to __declspec(dllexport) at least one symbol from foo, | ||||
| # otherwise foo.lib will not be created. | ||||
| target_link_libraries(bar PUBLIC foo) | ||||
| ``` | ||||
|  | ||||
| You can build it with: | ||||
|  | ||||
| ``` | ||||
| mkdir build | ||||
| cd build | ||||
| cmake .. | ||||
| cmake --build . | ||||
| ``` | ||||
|  | ||||
| ### Known MSVC (and MSVC with NVCC) bugs | ||||
|  | ||||
| The PyTorch codebase sometimes likes to use exciting C++ features, and | ||||
| these exciting features lead to exciting bugs in Windows compilers. | ||||
| To add insult to injury, the error messages will often not tell you | ||||
| which line of code actually induced the erroring template instantiation. | ||||
|  | ||||
| I've found the most effective way to debug these problems is to | ||||
| carefully read over diffs, keeping in mind known bugs in MSVC/NVCC. | ||||
| Here are a few well known pitfalls and workarounds: | ||||
|  | ||||
| * This is not actually a bug per se, but in general, code generated by MSVC | ||||
|   is more sensitive to memory errors; you may have written some code | ||||
|   that does a use-after-free or stack overflows; on Linux the code | ||||
|   might work, but on Windows your program will crash.  ASAN may not | ||||
|   catch all of these problems: stay vigilant to the possibility that | ||||
|   your crash is due to a real memory problem. | ||||
|  | ||||
| * (NVCC) `at::optional` does not work when used from device code.  Don't use | ||||
|   it from kernels.  Upstream issue: https://github.com/akrzemi1/Optional/issues/58 | ||||
|   and our local issue #10329. | ||||
|  | ||||
| * `constexpr` generally works less well on MSVC. | ||||
|  | ||||
|   * The idiom `static_assert(f() == f())` to test if `f` is constexpr | ||||
|     does not work; you'll get "error C2131: expression did not evaluate | ||||
|     to a constant".  Don't use these asserts on Windows. | ||||
|     (Example: `aten/src/ATen/core/intrusive_ptr.h`) | ||||
|  | ||||
| * (NVCC) Code you access inside a `static_assert` will eagerly be | ||||
|   evaluated as if it were device code, and so you might get an error | ||||
|   that the code is "not accessible". | ||||
|  | ||||
| ``` | ||||
| class A { | ||||
|   static A singleton_; | ||||
|   static constexpr inline A* singleton() { | ||||
|     return &singleton_; | ||||
|   } | ||||
| }; | ||||
| static_assert(std::is_same(A*, decltype(A::singelton()))::value, "hmm"); | ||||
| ``` | ||||
|  | ||||
| * The compiler will run out of heap if you attempt to compile files that | ||||
|   are too large.  Splitting such files into separate files helps. | ||||
|   (Example: `THTensorMath`, `THTensorMoreMath`, `THTensorEvenMoreMath`.) | ||||
|  | ||||
| ## Caffe2 notes | ||||
|  | ||||
| In 2018, we merged Caffe2 into the PyTorch source repository.  While the | ||||
| steady state aspiration is that Caffe2 and PyTorch share code freely, | ||||
| in the meantime there will be some separation. | ||||
|  | ||||
| If you submit a PR to only PyTorch or only Caffe2 code, CI will only | ||||
| run for the project you edited.  The logic for this is implemented | ||||
| in `.jenkins/pytorch/dirty.sh` and `.jenkins/caffe2/dirty.sh`; you | ||||
| can look at this to see what path prefixes constitute changes. | ||||
| This also means if you ADD a new top-level path, or you start | ||||
| sharing code between projects, you need to modify these files. | ||||
|  | ||||
| There are a few "unusual" directories which, for historical reasons, | ||||
| are Caffe2/PyTorch specific.  Here they are: | ||||
|  | ||||
| - `CMakeLists.txt`, `Makefile`, `binaries`, `cmake`, `conda`, `modules`, | ||||
|   `scripts` are Caffe2-specific.  Don't put PyTorch code in them without | ||||
|   extra coordination. | ||||
|  | ||||
| - `mypy*`, `requirements.txt`, `setup.py`, `test`, `tools` are | ||||
|   PyTorch-specific.  Don't put Caffe2 code in them without extra | ||||
|   coordination. | ||||
|  | ||||
							
								
								
									
										41
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,41 @@ | ||||
| FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04  | ||||
|  | ||||
| RUN echo "deb http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|          build-essential \ | ||||
|          cmake \ | ||||
|          git \ | ||||
|          curl \ | ||||
|          vim \ | ||||
|          ca-certificates \ | ||||
|          libnccl2=2.0.5-2+cuda8.0 \ | ||||
|          libnccl-dev=2.0.5-2+cuda8.0 \ | ||||
|          libjpeg-dev \ | ||||
|          libpng-dev &&\ | ||||
|      rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
|  | ||||
| ENV PYTHON_VERSION=3.6 | ||||
| RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \ | ||||
|      chmod +x ~/miniconda.sh && \ | ||||
|      ~/miniconda.sh -b -p /opt/conda && \      | ||||
|      rm ~/miniconda.sh && \ | ||||
| #     /opt/conda/bin/conda install conda-build && \ | ||||
|      /opt/conda/bin/conda create -y --name pytorch-py$PYTHON_VERSION python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl&& \ | ||||
|      /opt/conda/bin/conda clean -ya  | ||||
| ENV PATH /opt/conda/envs/pytorch-py$PYTHON_VERSION/bin:$PATH | ||||
| RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda80 | ||||
| # This must be done before pip so that requirements.txt is available | ||||
| WORKDIR /opt/pytorch | ||||
| COPY . . | ||||
|  | ||||
| RUN git submodule update --init | ||||
| RUN TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ | ||||
|     CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ | ||||
|     pip install -v . | ||||
|  | ||||
| RUN git clone https://github.com/pytorch/vision.git && cd vision && pip install -v . | ||||
|  | ||||
| WORKDIR /workspace | ||||
| RUN chmod -R a+w /workspace | ||||
							
								
								
									
										32
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										32
									
								
								LICENSE
									
									
									
									
									
								
							| @ -1,5 +1,3 @@ | ||||
| From PyTorch: | ||||
|  | ||||
| Copyright (c) 2016-     Facebook, Inc            (Adam Paszke) | ||||
| Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala) | ||||
| Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) | ||||
| @ -10,36 +8,6 @@ Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, | ||||
| Copyright (c) 2006      Idiap Research Institute (Samy Bengio) | ||||
| Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) | ||||
|  | ||||
| From Caffe2: | ||||
|  | ||||
| Copyright (c) 2016-present, Facebook Inc. All rights reserved. | ||||
|  | ||||
| All contributions by Facebook: | ||||
| Copyright (c) 2016 Facebook Inc. | ||||
|   | ||||
| All contributions by Google: | ||||
| Copyright (c) 2015 Google Inc. | ||||
| All rights reserved. | ||||
|   | ||||
| All contributions by Yangqing Jia: | ||||
| Copyright (c) 2015 Yangqing Jia | ||||
| All rights reserved. | ||||
|   | ||||
| All contributions from Caffe: | ||||
| Copyright(c) 2013, 2014, 2015, the respective contributors | ||||
| All rights reserved. | ||||
|   | ||||
| All other contributions: | ||||
| Copyright(c) 2015, 2016 the respective contributors | ||||
| All rights reserved. | ||||
|   | ||||
| Caffe2 uses a copyright model similar to Caffe: each contributor holds | ||||
| copyright over their contributions to Caffe2. The project versioning records | ||||
| all such contribution and copyright details. If a contributor wants to further | ||||
| mark their specific copyright on a particular contribution, they should | ||||
| indicate their copyright solely in the commit message of the change when it is | ||||
| committed. | ||||
|  | ||||
| All rights reserved. | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without | ||||
|  | ||||
							
								
								
									
										21
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								Makefile
									
									
									
									
									
								
							| @ -1,21 +0,0 @@ | ||||
| # This makefile does nothing but delegating the actual building to cmake. | ||||
|  | ||||
| all: | ||||
| 	@mkdir -p build && cd build && cmake .. $(shell python ./scripts/get_python_cmake_flags.py) && $(MAKE) | ||||
|  | ||||
| local: | ||||
| 	@./scripts/build_local.sh | ||||
|  | ||||
| android: | ||||
| 	@./scripts/build_android.sh | ||||
|  | ||||
| ios: | ||||
| 	@./scripts/build_ios.sh | ||||
|  | ||||
| clean: # This will remove ALL build folders. | ||||
| 	@rm -r build*/ | ||||
|  | ||||
| linecount: | ||||
| 	@cloc --read-lang-def=caffe.cloc caffe2 || \ | ||||
| 		echo "Cloc is not available on the machine. You can install cloc with " && \ | ||||
| 		echo "    sudo apt-get install cloc" | ||||
							
								
								
									
										309
									
								
								NOTICE
									
									
									
									
									
								
							
							
						
						
									
										309
									
								
								NOTICE
									
									
									
									
									
								
							| @ -1,309 +0,0 @@ | ||||
| ======================================================================= | ||||
| Software under third_party | ||||
| ======================================================================= | ||||
| Software libraries under third_party are provided as github submodule | ||||
| links, and their content is not part of the Caffe2 codebase. Their | ||||
| licences can be found under the respective software repositories. | ||||
|  | ||||
| ======================================================================= | ||||
| Earlier BSD License | ||||
| ======================================================================= | ||||
| Early development of Caffe2 in 2015 and early 2016 is licensed under the | ||||
| BSD license. The license is attached below: | ||||
|  | ||||
| All contributions by Facebook: | ||||
| Copyright (c) 2016 Facebook Inc. | ||||
|  | ||||
| All contributions by Google: | ||||
| Copyright (c) 2015 Google Inc. | ||||
| All rights reserved. | ||||
|  | ||||
| All contributions by Yangqing Jia: | ||||
| Copyright (c) 2015 Yangqing Jia | ||||
| All rights reserved. | ||||
|  | ||||
| All other contributions: | ||||
| Copyright(c) 2015, 2016 the respective contributors | ||||
| All rights reserved. | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are met: | ||||
|  | ||||
| 1. Redistributions of source code must retain the above copyright notice, this | ||||
|    list of conditions and the following disclaimer. | ||||
| 2. Redistributions in binary form must reproduce the above copyright notice, | ||||
|    this list of conditions and the following disclaimer in the documentation | ||||
|    and/or other materials provided with the distribution. | ||||
|  | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||||
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||||
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||||
| ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
|  | ||||
|  | ||||
| ======================================================================= | ||||
| Caffe's BSD License | ||||
| ======================================================================= | ||||
| Some parts of the caffe2 code is derived from the original Caffe code, which is | ||||
| created by Yangqing Jia and is now a BSD-licensed open-source project. The Caffe | ||||
| license is as follows: | ||||
|  | ||||
| COPYRIGHT | ||||
|  | ||||
| All contributions by the University of California: | ||||
| Copyright (c) 2014, The Regents of the University of California (Regents) | ||||
| All rights reserved. | ||||
|  | ||||
| All other contributions: | ||||
| Copyright (c) 2014, the respective contributors | ||||
| All rights reserved. | ||||
|  | ||||
| Caffe uses a shared copyright model: each contributor holds copyright over | ||||
| their contributions to Caffe. The project versioning records all such | ||||
| contribution and copyright details. If a contributor wants to further mark | ||||
| their specific copyright on a particular contribution, they should indicate | ||||
| their copyright solely in the commit message of the change when it is | ||||
| committed. | ||||
|  | ||||
| LICENSE | ||||
|  | ||||
| Redistribution and use in source and binary forms, with or without | ||||
| modification, are permitted provided that the following conditions are met: | ||||
|  | ||||
| 1. Redistributions of source code must retain the above copyright notice, this | ||||
|    list of conditions and the following disclaimer. | ||||
| 2. Redistributions in binary form must reproduce the above copyright notice, | ||||
|    this list of conditions and the following disclaimer in the documentation | ||||
|    and/or other materials provided with the distribution. | ||||
|  | ||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||||
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||||
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||||
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||||
| ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
|  | ||||
| CONTRIBUTION AGREEMENT | ||||
|  | ||||
| By contributing to the BVLC/caffe repository through pull-request, comment, | ||||
| or otherwise, the contributor releases their content to the | ||||
| license and copyright terms herein. | ||||
|  | ||||
| ======================================================================= | ||||
| Caffe2's Apache License | ||||
| ======================================================================= | ||||
|  | ||||
| This repo contains Caffe2 code, which was previously licensed under | ||||
| Apache License Version 2.0: | ||||
|  | ||||
|                                  Apache License | ||||
|                            Version 2.0, January 2004 | ||||
|                         http://www.apache.org/licenses/ | ||||
|  | ||||
|    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | ||||
|  | ||||
|    1. Definitions. | ||||
|  | ||||
|       "License" shall mean the terms and conditions for use, reproduction, | ||||
|       and distribution as defined by Sections 1 through 9 of this document. | ||||
|  | ||||
|       "Licensor" shall mean the copyright owner or entity authorized by | ||||
|       the copyright owner that is granting the License. | ||||
|  | ||||
|       "Legal Entity" shall mean the union of the acting entity and all | ||||
|       other entities that control, are controlled by, or are under common | ||||
|       control with that entity. For the purposes of this definition, | ||||
|       "control" means (i) the power, direct or indirect, to cause the | ||||
|       direction or management of such entity, whether by contract or | ||||
|       otherwise, or (ii) ownership of fifty percent (50%) or more of the | ||||
|       outstanding shares, or (iii) beneficial ownership of such entity. | ||||
|  | ||||
|       "You" (or "Your") shall mean an individual or Legal Entity | ||||
|       exercising permissions granted by this License. | ||||
|  | ||||
|       "Source" form shall mean the preferred form for making modifications, | ||||
|       including but not limited to software source code, documentation | ||||
|       source, and configuration files. | ||||
|  | ||||
|       "Object" form shall mean any form resulting from mechanical | ||||
|       transformation or translation of a Source form, including but | ||||
|       not limited to compiled object code, generated documentation, | ||||
|       and conversions to other media types. | ||||
|  | ||||
|       "Work" shall mean the work of authorship, whether in Source or | ||||
|       Object form, made available under the License, as indicated by a | ||||
|       copyright notice that is included in or attached to the work | ||||
|       (an example is provided in the Appendix below). | ||||
|  | ||||
|       "Derivative Works" shall mean any work, whether in Source or Object | ||||
|       form, that is based on (or derived from) the Work and for which the | ||||
|       editorial revisions, annotations, elaborations, or other modifications | ||||
|       represent, as a whole, an original work of authorship. For the purposes | ||||
|       of this License, Derivative Works shall not include works that remain | ||||
|       separable from, or merely link (or bind by name) to the interfaces of, | ||||
|       the Work and Derivative Works thereof. | ||||
|  | ||||
|       "Contribution" shall mean any work of authorship, including | ||||
|       the original version of the Work and any modifications or additions | ||||
|       to that Work or Derivative Works thereof, that is intentionally | ||||
|       submitted to Licensor for inclusion in the Work by the copyright owner | ||||
|       or by an individual or Legal Entity authorized to submit on behalf of | ||||
|       the copyright owner. For the purposes of this definition, "submitted" | ||||
|       means any form of electronic, verbal, or written communication sent | ||||
|       to the Licensor or its representatives, including but not limited to | ||||
|       communication on electronic mailing lists, source code control systems, | ||||
|       and issue tracking systems that are managed by, or on behalf of, the | ||||
|       Licensor for the purpose of discussing and improving the Work, but | ||||
|       excluding communication that is conspicuously marked or otherwise | ||||
|       designated in writing by the copyright owner as "Not a Contribution." | ||||
|  | ||||
|       "Contributor" shall mean Licensor and any individual or Legal Entity | ||||
|       on behalf of whom a Contribution has been received by Licensor and | ||||
|       subsequently incorporated within the Work. | ||||
|  | ||||
|    2. Grant of Copyright License. Subject to the terms and conditions of | ||||
|       this License, each Contributor hereby grants to You a perpetual, | ||||
|       worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||||
|       copyright license to reproduce, prepare Derivative Works of, | ||||
|       publicly display, publicly perform, sublicense, and distribute the | ||||
|       Work and such Derivative Works in Source or Object form. | ||||
|  | ||||
|    3. Grant of Patent License. Subject to the terms and conditions of | ||||
|       this License, each Contributor hereby grants to You a perpetual, | ||||
|       worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||||
|       (except as stated in this section) patent license to make, have made, | ||||
|       use, offer to sell, sell, import, and otherwise transfer the Work, | ||||
|       where such license applies only to those patent claims licensable | ||||
|       by such Contributor that are necessarily infringed by their | ||||
|       Contribution(s) alone or by combination of their Contribution(s) | ||||
|       with the Work to which such Contribution(s) was submitted. If You | ||||
|       institute patent litigation against any entity (including a | ||||
|       cross-claim or counterclaim in a lawsuit) alleging that the Work | ||||
|       or a Contribution incorporated within the Work constitutes direct | ||||
|       or contributory patent infringement, then any patent licenses | ||||
|       granted to You under this License for that Work shall terminate | ||||
|       as of the date such litigation is filed. | ||||
|  | ||||
|    4. Redistribution. You may reproduce and distribute copies of the | ||||
|       Work or Derivative Works thereof in any medium, with or without | ||||
|       modifications, and in Source or Object form, provided that You | ||||
|       meet the following conditions: | ||||
|  | ||||
|       (a) You must give any other recipients of the Work or | ||||
|           Derivative Works a copy of this License; and | ||||
|  | ||||
|       (b) You must cause any modified files to carry prominent notices | ||||
|           stating that You changed the files; and | ||||
|  | ||||
|       (c) You must retain, in the Source form of any Derivative Works | ||||
|           that You distribute, all copyright, patent, trademark, and | ||||
|           attribution notices from the Source form of the Work, | ||||
|           excluding those notices that do not pertain to any part of | ||||
|           the Derivative Works; and | ||||
|  | ||||
|       (d) If the Work includes a "NOTICE" text file as part of its | ||||
|           distribution, then any Derivative Works that You distribute must | ||||
|           include a readable copy of the attribution notices contained | ||||
|           within such NOTICE file, excluding those notices that do not | ||||
|           pertain to any part of the Derivative Works, in at least one | ||||
|           of the following places: within a NOTICE text file distributed | ||||
|           as part of the Derivative Works; within the Source form or | ||||
|           documentation, if provided along with the Derivative Works; or, | ||||
|           within a display generated by the Derivative Works, if and | ||||
|           wherever such third-party notices normally appear. The contents | ||||
|           of the NOTICE file are for informational purposes only and | ||||
|           do not modify the License. You may add Your own attribution | ||||
|           notices within Derivative Works that You distribute, alongside | ||||
|           or as an addendum to the NOTICE text from the Work, provided | ||||
|           that such additional attribution notices cannot be construed | ||||
|           as modifying the License. | ||||
|  | ||||
|       You may add Your own copyright statement to Your modifications and | ||||
|       may provide additional or different license terms and conditions | ||||
|       for use, reproduction, or distribution of Your modifications, or | ||||
|       for any such Derivative Works as a whole, provided Your use, | ||||
|       reproduction, and distribution of the Work otherwise complies with | ||||
|       the conditions stated in this License. | ||||
|  | ||||
|    5. Submission of Contributions. Unless You explicitly state otherwise, | ||||
|       any Contribution intentionally submitted for inclusion in the Work | ||||
|       by You to the Licensor shall be under the terms and conditions of | ||||
|       this License, without any additional terms or conditions. | ||||
|       Notwithstanding the above, nothing herein shall supersede or modify | ||||
|       the terms of any separate license agreement you may have executed | ||||
|       with Licensor regarding such Contributions. | ||||
|  | ||||
|    6. Trademarks. This License does not grant permission to use the trade | ||||
|       names, trademarks, service marks, or product names of the Licensor, | ||||
|       except as required for reasonable and customary use in describing the | ||||
|       origin of the Work and reproducing the content of the NOTICE file. | ||||
|  | ||||
|    7. Disclaimer of Warranty. Unless required by applicable law or | ||||
|       agreed to in writing, Licensor provides the Work (and each | ||||
|       Contributor provides its Contributions) on an "AS IS" BASIS, | ||||
|       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||||
|       implied, including, without limitation, any warranties or conditions | ||||
|       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | ||||
|       PARTICULAR PURPOSE. You are solely responsible for determining the | ||||
|       appropriateness of using or redistributing the Work and assume any | ||||
|       risks associated with Your exercise of permissions under this License. | ||||
|  | ||||
|    8. Limitation of Liability. In no event and under no legal theory, | ||||
|       whether in tort (including negligence), contract, or otherwise, | ||||
|       unless required by applicable law (such as deliberate and grossly | ||||
|       negligent acts) or agreed to in writing, shall any Contributor be | ||||
|       liable to You for damages, including any direct, indirect, special, | ||||
|       incidental, or consequential damages of any character arising as a | ||||
|       result of this License or out of the use or inability to use the | ||||
|       Work (including but not limited to damages for loss of goodwill, | ||||
|       work stoppage, computer failure or malfunction, or any and all | ||||
|       other commercial damages or losses), even if such Contributor | ||||
|       has been advised of the possibility of such damages. | ||||
|  | ||||
|    9. Accepting Warranty or Additional Liability. While redistributing | ||||
|       the Work or Derivative Works thereof, You may choose to offer, | ||||
|       and charge a fee for, acceptance of support, warranty, indemnity, | ||||
|       or other liability obligations and/or rights consistent with this | ||||
|       License. However, in accepting such obligations, You may act only | ||||
|       on Your own behalf and on Your sole responsibility, not on behalf | ||||
|       of any other Contributor, and only if You agree to indemnify, | ||||
|       defend, and hold each Contributor harmless for any liability | ||||
|       incurred by, or claims asserted against, such Contributor by reason | ||||
|       of your accepting any such warranty or additional liability. | ||||
|  | ||||
|    END OF TERMS AND CONDITIONS | ||||
|  | ||||
|    APPENDIX: How to apply the Apache License to your work. | ||||
|  | ||||
|       To apply the Apache License to your work, attach the following | ||||
|       boilerplate notice, with the fields enclosed by brackets "[]" | ||||
|       replaced with your own identifying information. (Don't include | ||||
|       the brackets!)  The text should be enclosed in the appropriate | ||||
|       comment syntax for the file format. We also recommend that a | ||||
|       file or class name and description of purpose be included on the | ||||
|       same "printed page" as the copyright notice for easier | ||||
|       identification within third-party archives. | ||||
|  | ||||
|    Copyright [yyyy] [name of copyright owner] | ||||
|  | ||||
|    Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|    you may not use this file except in compliance with the License. | ||||
|    You may obtain a copy of the License at | ||||
|  | ||||
|        http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
|    Unless required by applicable law or agreed to in writing, software | ||||
|    distributed under the License is distributed on an "AS IS" BASIS, | ||||
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|    See the License for the specific language governing permissions and | ||||
|    limitations under the License. | ||||
							
								
								
									
										134
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										134
									
								
								README.md
									
									
									
									
									
								
							| @ -1,4 +1,4 @@ | ||||
|  | ||||
| <p align="center"><img width="40%" src="docs/source/_static/img/pytorch-logo-dark.png" /></p> | ||||
|  | ||||
| -------------------------------------------------------------------------------- | ||||
|  | ||||
| @ -15,8 +15,6 @@ We are in an early-release beta. Expect some adventures and rough edges. | ||||
|   - [Binaries](#binaries) | ||||
|   - [From Source](#from-source) | ||||
|   - [Docker Image](#docker-image) | ||||
|   - [Building the Documentation](#building-the-documentation) | ||||
|   - [Previous Versions](#previous-versions) | ||||
| - [Getting Started](#getting-started) | ||||
| - [Communication](#communication) | ||||
| - [Releases and Contributing](#releases-and-contributing) | ||||
| @ -24,25 +22,41 @@ We are in an early-release beta. Expect some adventures and rough edges. | ||||
|  | ||||
| | System | 2.7 | 3.5 | | ||||
| | --- | --- | --- | | ||||
| | Linux CPU | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | | ||||
| | Linux GPU | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | [](https://ci.pytorch.org/jenkins/job/pytorch-master/) | | ||||
| | Windows GPU | <center>—</center> | [](https://ci.pytorch.org/jenkins/job/pytorch-builds/job/pytorch-win-ws2016-cuda9-cudnn7-py3-trigger/) | ||||
|  | ||||
| See also the [ci.pytorch.org HUD](https://ezyang.github.io/pytorch-ci-hud/build/pytorch-master). | ||||
| | Linux CPU | [](https://travis-ci.org/pytorch/pytorch) | [](https://travis-ci.org/pytorch/pytorch) | | ||||
| | Linux GPU | [](https://build.pytorch.org/job/pytorch-master-py2-linux) | [](https://build.pytorch.org/job/pytorch-master-py3-linux) | | ||||
| | macOS CPU | [](https://build.pytorch.org/job/pytorch-master-py2-osx-cpu) | [](https://build.pytorch.org/job/pytorch-master-py3-osx-cpu) | | ||||
|  | ||||
|  | ||||
| ## More about PyTorch | ||||
|  | ||||
| At a granular level, PyTorch is a library that consists of the following components: | ||||
|  | ||||
| | Component | Description | | ||||
| | ---- | --- | | ||||
| | **torch** | a Tensor library like NumPy, with strong GPU support | | ||||
| | **torch.autograd** | a tape-based automatic differentiation library that supports all differentiable Tensor operations in torch | | ||||
| | **torch.nn** | a neural networks library deeply integrated with autograd designed for maximum flexibility | | ||||
| | **torch.multiprocessing** | Python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and Hogwild training | | ||||
| | **torch.utils** | DataLoader, Trainer and other utility functions for convenience | | ||||
| | **torch.legacy(.nn/.optim)** | legacy code that has been ported over from torch for backward compatibility reasons | | ||||
| <table> | ||||
| <tr> | ||||
|     <td><b> torch </b></td> | ||||
|     <td> a Tensor library like NumPy, with strong GPU support </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td><b> torch.autograd </b></td> | ||||
|     <td> a tape-based automatic differentiation library that supports all differentiable Tensor operations in torch </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td><b> torch.nn </b></td> | ||||
|     <td> a neural networks library deeply integrated with autograd designed for maximum flexibility </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td><b> torch.multiprocessing  </b></td> | ||||
|     <td> Python multiprocessing, but with magical memory sharing of torch Tensors across processes. Useful for data loading and Hogwild training. </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td><b> torch.utils </b></td> | ||||
|     <td> DataLoader, Trainer and other utility functions for convenience </td> | ||||
| </tr> | ||||
| <tr> | ||||
|     <td><b> torch.legacy(.nn/.optim) </b></td> | ||||
|     <td> legacy code that has been ported over from torch for backward compatibility reasons </td> | ||||
| </tr> | ||||
| </table> | ||||
|  | ||||
| Usually one uses PyTorch either as: | ||||
|  | ||||
| @ -55,10 +69,10 @@ Elaborating further: | ||||
|  | ||||
| If you use NumPy, then you have used Tensors (a.k.a ndarray). | ||||
|  | ||||
|  | ||||
| <p align=center><img width="30%" src="docs/source/_static/img/tensor_illustration.png" /></p> | ||||
|  | ||||
| PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerates the | ||||
| computation by a huge amount. | ||||
| PyTorch provides Tensors that can live either on the CPU or the GPU, and accelerate | ||||
| compute by a huge amount. | ||||
|  | ||||
| We provide a wide variety of tensor routines to accelerate and fit your scientific computation needs | ||||
| such as slicing, indexing, math operations, linear algebra, reductions. | ||||
| @ -82,7 +96,7 @@ from several research papers on this topic, as well as current and past work suc | ||||
| While this technique is not unique to PyTorch, it's one of the fastest implementations of it to date. | ||||
| You get the best of speed and flexibility for your crazy research. | ||||
|  | ||||
|  | ||||
| <p align=center><img width="80%" src="docs/source/_static/img/dynamic_graph.gif" /></p> | ||||
|  | ||||
| ### Python First | ||||
|  | ||||
| @ -106,7 +120,8 @@ We hope you never spend hours debugging your code because of bad stack traces or | ||||
| PyTorch has minimal framework overhead. We integrate acceleration libraries | ||||
| such as Intel MKL and NVIDIA (cuDNN, NCCL) to maximize speed. | ||||
| At the core, its CPU and GPU Tensor and neural network backends | ||||
| (TH, THC, THNN, THCUNN) are mature and have been tested for years. | ||||
| (TH, THC, THNN, THCUNN) are written as independent libraries with a C99 API. | ||||
| They are mature and have been tested for years. | ||||
|  | ||||
| Hence, PyTorch is quite fast – whether you run small or large neural networks. | ||||
|  | ||||
| @ -123,8 +138,9 @@ and with minimal abstractions. | ||||
| You can write new neural network layers in Python using the torch API | ||||
| [or your favorite NumPy-based libraries such as SciPy](http://pytorch.org/tutorials/advanced/numpy_extensions_tutorial.html). | ||||
|  | ||||
| If you want to write your layers in C/C++, we provide a convenient extension API that is efficient and with minimal boilerplate. | ||||
| There is no wrapper code that needs to be written. You can see [a tutorial here](http://pytorch.org/tutorials/advanced/cpp_extension.html) and [an example here](https://github.com/pytorch/extension-cpp). | ||||
| If you want to write your layers in C/C++, we provide an extension API based on | ||||
| [cffi](http://cffi.readthedocs.io/en/latest/) that is efficient and with minimal boilerplate. | ||||
| There is no wrapper code that needs to be written. You can see [a tutorial here](http://pytorch.org/tutorials/advanced/c_extension.html) and [an example here](https://github.com/pytorch/extension-ffi). | ||||
|  | ||||
|  | ||||
| ## Installation | ||||
| @ -146,11 +162,6 @@ If you want to compile with CUDA support, install | ||||
| - [NVIDIA cuDNN](https://developer.nvidia.com/cudnn) v6.x or above | ||||
|  | ||||
| If you want to disable CUDA support, export environment variable `NO_CUDA=1`. | ||||
| Other potentially useful environment variables may be found in `setup.py`. | ||||
|  | ||||
| If you want to build on Windows, Visual Studio 2017 14.11 toolset and NVTX are also needed. | ||||
| Especially, for CUDA 8 build on Windows, there will be an additional requirement for VS 2015 Update 3 and a patch for it. | ||||
| The details of the patch can be found out [here](https://support.microsoft.com/en-gb/help/4020481/fix-link-exe-crashes-with-a-fatal-lnk1000-error-when-you-use-wholearch). | ||||
|  | ||||
| #### Install optional dependencies | ||||
|  | ||||
| @ -159,27 +170,20 @@ On Linux | ||||
| export CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" # [anaconda root directory] | ||||
|  | ||||
| # Install basic dependencies | ||||
| conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing | ||||
| conda install -c mingfeima mkldnn | ||||
| conda install numpy pyyaml mkl setuptools cmake cffi | ||||
|  | ||||
| # Add LAPACK support for the GPU | ||||
| conda install -c pytorch magma-cuda80 # or magma-cuda90 if CUDA 9 | ||||
| conda install -c soumith magma-cuda80 # or magma-cuda75 if CUDA 7.5 | ||||
| ``` | ||||
|  | ||||
| On macOS | ||||
| On OSX | ||||
| ```bash | ||||
| export CMAKE_PREFIX_PATH=[anaconda root directory] | ||||
| conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing | ||||
| ``` | ||||
|  | ||||
| On Windows | ||||
| ```cmd | ||||
| conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing | ||||
| conda install numpy pyyaml setuptools cmake cffi | ||||
| ``` | ||||
| #### Get the PyTorch source | ||||
| ```bash | ||||
| git clone --recursive https://github.com/pytorch/pytorch | ||||
| cd pytorch | ||||
| ``` | ||||
|  | ||||
| #### Install PyTorch | ||||
| @ -188,35 +192,22 @@ On Linux | ||||
| python setup.py install | ||||
| ``` | ||||
|  | ||||
| On macOS | ||||
| On OSX | ||||
| ```bash | ||||
| MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install | ||||
| ``` | ||||
|  | ||||
| On Windows | ||||
| ```cmd | ||||
| set "VS150COMNTOOLS=C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build" | ||||
| set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 | ||||
| set DISTUTILS_USE_SDK=1 | ||||
| REM The following two lines are needed for Python 2.7, but the support for it is very experimental. | ||||
| set MSSdk=1 | ||||
| set FORCE_PY27_BUILD=1 | ||||
| REM As for CUDA 8, VS2015 Update 3 is also required to build PyTorch. Use the following line. | ||||
| set "CUDA_HOST_COMPILER=%VS140COMNTOOLS%\..\..\VC\bin\amd64\cl.exe" | ||||
|  | ||||
| call "%VS150COMNTOOLS%\vcvarsall.bat" x64 -vcvars_ver=14.11 | ||||
| python setup.py install | ||||
| ``` | ||||
|  | ||||
| ### Docker image | ||||
|  | ||||
| Dockerfile is supplied to build images with cuda support and cudnn v7. You can pass -e PYTHON_VERSION=x.y flag to specificy which python to be used by Miniconda, or leave it unset to use the default. Build as usual | ||||
| Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual | ||||
| ``` | ||||
| docker build -t pytorch -f docker/pytorch/Dockerfile . | ||||
| docker build -t pytorch . | ||||
| ``` | ||||
|  | ||||
| You can also pull a pre-built docker image from Docker Hub and run with nvidia-docker, | ||||
| but this is not currently maintained and will pull PyTorch 0.2. | ||||
| Dockerfile to build with cuda 9 and cudnn v7 (with Volta support) is in tools/docker, the build command is | ||||
| ``` | ||||
| docker build -t pytorch_cuda9 -f tools/docker/Dockerfile9 . | ||||
| ``` | ||||
| Alternatively, if you want to use a runtime image, you can use the pre-built one from Docker Hub and run with nvidia-docker: | ||||
| ``` | ||||
| nvidia-docker run --rm -ti --ipc=host pytorch/pytorch:latest | ||||
| ``` | ||||
| @ -224,41 +215,24 @@ Please note that PyTorch uses shared memory to share data between processes, so | ||||
| for multithreaded data loaders) the default shared memory segment size that container runs with is not enough, and you | ||||
| should increase shared memory size either with `--ipc=host` or `--shm-size` command line options to `nvidia-docker run`. | ||||
|  | ||||
| ### Building the Documentation | ||||
|  | ||||
| To build documentation in various formats, you will need Sphinx and the | ||||
| readthedocs theme. | ||||
|  | ||||
| ``` | ||||
| cd docs/ | ||||
| pip install -r requirements.txt | ||||
| ``` | ||||
| You can then build the documentation by running ``make <format>`` from the | ||||
| ``docs/`` folder. Run ``make`` to get a list of all available output formats. | ||||
|  | ||||
| ### Previous Versions | ||||
|  | ||||
| Installation instructions and binaries for previous PyTorch versions may be found | ||||
| on [our website](http://pytorch.org/previous-versions/). | ||||
|  | ||||
|  | ||||
| ## Getting Started | ||||
|  | ||||
| Three pointers to get you started: | ||||
| - [Tutorials: get you started with understanding and using PyTorch](https://pytorch.org/tutorials/) | ||||
| - [Tutorials: get you started with understanding and using PyTorch](http://pytorch.org/tutorials/) | ||||
| - [Examples: easy to understand pytorch code across all domains](https://github.com/pytorch/examples) | ||||
| - [The API Reference](http://pytorch.org/docs/) | ||||
|  | ||||
| ## Communication | ||||
| * forums: discuss implementations, research, etc. http://discuss.pytorch.org | ||||
| * GitHub issues: bug reports, feature requests, install issues, RFCs, thoughts, etc. | ||||
| * Slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . Our slack channel is invite-only to promote a healthy balance between power-users and beginners. If you need a slack invite, ping us at slack@pytorch.org | ||||
| * Slack: general chat, online discussions, collaboration etc. https://pytorch.slack.com/ . Our slack channel is invite-only to promote a healthy balance between power-users and beginners. If you need a slack invite, ping us at soumith@pytorch.org | ||||
| * newsletter: no-noise, one-way email newsletter with important announcements about pytorch. You can sign-up here: http://eepurl.com/cbG0rv | ||||
|  | ||||
| ## Releases and Contributing | ||||
|  | ||||
| PyTorch has a 90 day release cycle (major releases). | ||||
| Its current state is Beta, we expect no obvious bugs. Please let us know if you encounter a bug by [filing an issue](https://github.com/pytorch/pytorch/issues). | ||||
| It's current state is Beta, we expect no obvious bugs. Please let us know if you encounter a bug by [filing an issue](https://github.com/pytorch/pytorch/issues). | ||||
|  | ||||
| We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. | ||||
|  | ||||
|  | ||||
| @ -1,3 +0,0 @@ | ||||
| [flake8] | ||||
| max-line-length = 120 | ||||
|  | ||||
							
								
								
									
										3
									
								
								aten/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								aten/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,3 +0,0 @@ | ||||
| __pycache__/ | ||||
| build/ | ||||
| *.pyc | ||||
| @ -1,105 +0,0 @@ | ||||
| if (BUILD_ATEN_MOBILE) | ||||
|   return() | ||||
| endif() | ||||
|  | ||||
| # Find modules | ||||
| list(APPEND CMAKE_MODULE_PATH | ||||
|   /usr/lib/x86_64-linux-gnu/ | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/public | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules_CUDA_fix) | ||||
| list(APPEND CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/) | ||||
|  | ||||
| cmake_policy(SET CMP0012 NEW) | ||||
|  | ||||
| ############################################# | ||||
|  | ||||
| set(ATen_CPU_SRCS) | ||||
| set(ATen_CPU_TEST_SRCS) | ||||
| set(ATen_CPU_INCLUDE) | ||||
| set(ATen_THIRD_PARTY_INCLUDE) | ||||
| set(ATen_CUDA_SRCS) | ||||
| set(ATen_CUDA_TEST_SRCS) | ||||
| set(ATen_CUDA_INCLUDE) | ||||
| set(ATen_CPU_DEPENDENCY_LIBS) | ||||
| set(ATen_CUDA_DEPENDENCY_LIBS) | ||||
| set(ATen_PUBLIC_CUDA_DEPENDENCY_LIBS) | ||||
| SET(ATEN_INSTALL_BIN_SUBDIR "bin" CACHE PATH "ATen install binary subdirectory") | ||||
| SET(ATEN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "ATen install library subdirectory") | ||||
| SET(ATEN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "ATen install include subdirectory") | ||||
|  | ||||
| if(USE_CUDA) | ||||
|   list(APPEND ATen_CUDA_INCLUDE ${CUDA_INCLUDE_DIRS}) | ||||
| endif() | ||||
|  | ||||
| set(TH_LINK_STYLE STATIC) | ||||
| add_subdirectory(src/TH) | ||||
| set(TH_CPU_INCLUDE | ||||
|   # dense | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/src/TH | ||||
|   ${CMAKE_CURRENT_BINARY_DIR}/src/TH | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/src | ||||
|   ${CMAKE_CURRENT_BINARY_DIR}/src | ||||
|   ${CMAKE_BINARY_DIR}/aten/src) | ||||
| list(APPEND ATen_CPU_INCLUDE ${TH_CPU_INCLUDE}) | ||||
|  | ||||
| if(USE_CUDA OR USE_ROCM) | ||||
|   set(TH_CUDA_INCLUDE | ||||
|     # dense | ||||
|     ${CMAKE_CURRENT_SOURCE_DIR}/src/THC | ||||
|     ${CMAKE_CURRENT_BINARY_DIR}/src/THC) | ||||
|   list(APPEND ATen_CUDA_INCLUDE ${TH_CUDA_INCLUDE}) | ||||
| endif() | ||||
|  | ||||
| add_subdirectory(src/THNN) | ||||
|  | ||||
| # Find the HIP package, set the HIP paths, load the HIP CMake. | ||||
| IF(USE_ROCM) | ||||
|   include(LoadHIP) | ||||
|   if (NOT PYTORCH_FOUND_HIP) | ||||
|     MESSAGE(FATAL_ERROR | ||||
|       "Could not find HIP installation") | ||||
|   endif() | ||||
| ENDIF() | ||||
|  | ||||
| IF(MSVC) | ||||
|   # we want to respect the standard, and we are bored of those **** . | ||||
|   ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1) | ||||
|   LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler /wd4819 -Xcompiler /wd4503 -Xcompiler /wd4190 -Xcompiler /wd4244 -Xcompiler /wd4251 -Xcompiler /wd4275 -Xcompiler /wd4522") | ||||
| ENDIF(MSVC) | ||||
|  | ||||
| if(USE_ROCM) | ||||
|   SET(AT_CUDA_ENABLED 1) | ||||
|   add_subdirectory(src/THC) | ||||
|   add_subdirectory(src/THCUNN) | ||||
|   message("ROCm is enabled.") | ||||
| elseif(USE_CUDA) | ||||
|   SET(AT_CUDA_ENABLED 1) | ||||
|   add_subdirectory(src/THC) | ||||
|   add_subdirectory(src/THCUNN) | ||||
| else() | ||||
|   message("disabling CUDA because USE_CUDA is set false") | ||||
|   SET(AT_CUDA_ENABLED 0) | ||||
| endif() | ||||
|  | ||||
| list(APPEND ATen_CPU_INCLUDE | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/src/THNN | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/src/THCUNN) | ||||
|  | ||||
| list(APPEND ATen_CPU_INCLUDE | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/src | ||||
|   ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/catch/single_include | ||||
|   ${CMAKE_CURRENT_BINARY_DIR}/src/ATen) | ||||
| add_subdirectory(src/ATen) | ||||
|  | ||||
| # Pass source, includes, and libs to parent | ||||
| set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE) | ||||
| set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE) | ||||
| set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE) | ||||
| set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) | ||||
| set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) | ||||
| set(ATen_CORE_TEST_SRCS ${ATen_CORE_TEST_SRCS} PARENT_SCOPE) | ||||
							
								
								
									
										258
									
								
								aten/README.md
									
									
									
									
									
								
							
							
						
						
									
										258
									
								
								aten/README.md
									
									
									
									
									
								
							| @ -1,258 +0,0 @@ | ||||
| # ATen: A TENsor library | ||||
|  | ||||
| ATen is a simple tensor library thats exposes the Tensor operations in Torch | ||||
| and PyTorch directly in C++11. The wrapper respects the semantics of operators | ||||
| in PyTorch, except minor details due to differences between C++ and Python in | ||||
| the way default arguments are handled. See the [documentation for tensors](http://pytorch.org/docs/tensors.html) in PyTorch for what these operations do. | ||||
| ATen's API is auto-generated from the same declarations PyTorch uses so the | ||||
| two APIs will track each other over time. | ||||
|  | ||||
| Tensor types are resolved dynamically, such that the API is generic and | ||||
| does not include templates. That is, there is one `Tensor` type. It can hold a | ||||
| CPU or CUDA Tensor, and the tensor may have Doubles, Float, Ints, etc. This design | ||||
| makes it easy to write generic code without templating everything. | ||||
|  | ||||
| See https://pytorch.org/cppdocs for the provided API. Excerpt: | ||||
| ```c++ | ||||
| Tensor atan2(const Tensor & other) const; | ||||
| Tensor & atan2_(const Tensor & other); | ||||
| Tensor pow(Scalar exponent) const; | ||||
| Tensor pow(const Tensor & exponent) const; | ||||
| Tensor & pow_(Scalar exponent); | ||||
| Tensor & pow_(const Tensor & exponent); | ||||
| Tensor lerp(const Tensor & end, Scalar weight) const; | ||||
| Tensor & lerp_(const Tensor & end, Scalar weight); | ||||
| Tensor histc() const; | ||||
| Tensor histc(int64_t bins) const; | ||||
| Tensor histc(int64_t bins, Scalar min) const; | ||||
| Tensor histc(int64_t bins, Scalar min, Scalar max) const; | ||||
| ``` | ||||
|  | ||||
| Inplace operations are also provided, and always suffixed by `_` to indicate they will modify the Tensor. | ||||
|  | ||||
| ### Installation | ||||
|  | ||||
| TH/THC/THNN/THCUNN are provided (as git subtrees), so the repo is standalone. You will need a C++11 compiler, cmake, and the pyyaml python package. | ||||
| ``` | ||||
|  | ||||
| # Install pyyaml used by python code generation to read API declarations | ||||
|  | ||||
| # macOS: if you don't have pip | ||||
| sudo easy_install pip | ||||
| # Ubuntu: if you don't have pip | ||||
| apt-get -y install python-pip | ||||
|  | ||||
| # if you don't have pyyaml | ||||
| sudo pip install pyyaml | ||||
|  | ||||
| mkdir build | ||||
| cd build | ||||
| cmake .. -DCMAKE_INSTALL_PREFIX=/where/you/want # specify your dest directory | ||||
| # cmake .. -DUSE_NVRTC=ON -DUSE_TENSORRT=OFF -DCMAKE_INSTALL_PREFIX=../install -DCAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO=OFF -DUSE_CUDA=ON # for CUDA | ||||
| # cmake .. -DUSE_CUDA=OFF  # for CPU only machines | ||||
| make install | ||||
| ``` | ||||
|  | ||||
| ### Example usage | ||||
|  | ||||
| Here is a simple example; again, the syntax follows Torch semantics. | ||||
|  | ||||
| ```c++ | ||||
| using namespace at; // assumed in the following | ||||
|  | ||||
| Tensor d = CPU(kFloat).ones({3, 4}); | ||||
| Tensor r = CPU(kFloat).zeros({3,4}); | ||||
| for(auto i = 0; i < 100000; i++) { | ||||
|   r = r.add(d); | ||||
|   // equivalently | ||||
|   r = r + d; | ||||
|   // or | ||||
|   r += d; | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Want this running on the GPU? | ||||
| ```c++ | ||||
| using namespace at; // assumed in the following | ||||
|  | ||||
| Tensor d = CUDA(kFloat).ones({3, 4}); | ||||
| Tensor r = CUDA(kFloat).zeros({3,4}); | ||||
| for(auto i = 0; i < 100000; i++) { | ||||
|   r = r.add(d); | ||||
|   // equivalently | ||||
|   r = r + d; | ||||
|   // or | ||||
|   r += d; | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Expressions like `CUDA(kFloat)` are first-class `at::Type` objects that represent | ||||
| the type of a Tensor and are used to create Tensors when their type cannot be | ||||
| inferred. | ||||
|  | ||||
| See more in [sample files](src/ATen/test). | ||||
|  | ||||
| ### Creating your kernel | ||||
|  | ||||
| It is easy to create new kernels, thanks to the `dispatch<>()` templated function. Example: | ||||
| ```c++ | ||||
|  | ||||
| // a simple sum kernel (for CPU only) | ||||
| template<typename T> | ||||
| struct sum_op { | ||||
|   // dispatch handles variable arguments for you | ||||
|   Tensor CPU(const Type & t, Tensor & x_) | ||||
|   { | ||||
|     Tensor x = x_.contiguous(); | ||||
|     auto x_p = x.data<T>(); | ||||
|     int64_t size = x.numel(); | ||||
|     T sum = 0; | ||||
|     for(int64_t i = 0; i < size; i++) { | ||||
|       sum += x_p[i]; | ||||
|     } | ||||
|     return sum; | ||||
|   }; | ||||
|   Tensor CUDA(Tensor& x) { | ||||
|     throw std::invalid_argument("device not supported"); | ||||
|   }; | ||||
| }; | ||||
|  | ||||
| Tensor a = CPU(kFloat).rand({3, 7}); | ||||
| std::cout << a << std::endl; | ||||
| std::cout << dispatch<sum_op>(a.type(),a) << " == " << a.sum() << std::endl; | ||||
| ``` | ||||
|  | ||||
| ### Efficient access to tensor elements | ||||
|  | ||||
| When using Tensor-wide operations, the relative cost of dynamic dispatch is very small. | ||||
| However, there are cases, especially in your own kernels, where efficient element-wise access is needed, | ||||
| and the cost of dynamic dispatch inside the element-wise loop is very high. | ||||
| ATen provides _accessors_ that are created with a single dynamic check that a Tensor is the type and number of | ||||
| dimensions. Accessors then expose an API for accessing the Tensor elements efficiently: | ||||
|  | ||||
| ```c++ | ||||
|  | ||||
| Tensor foo = CPU(kFloat).rand({12,12}); | ||||
|  | ||||
| // assert foo is 2-dimensional and holds floats. | ||||
| auto foo_a = foo.accessor<float,2>(); | ||||
| float trace = 0; | ||||
|  | ||||
| for(int i = 0; i < foo_a.size(0); i++) { | ||||
|   // use the accessor foo_a to get tensor data. | ||||
|   trace += foo_a[i][i]; | ||||
| } | ||||
| ``` | ||||
|  | ||||
| Accessors are temporary views of a Tensor. They are only valid for the lifetime of the tensor that they | ||||
| view and hence should only be used locally in a function, like iterators. | ||||
|  | ||||
| ### Using externally created data | ||||
|  | ||||
| If you already have your tensor data allocated in memory (CPU or CUDA), | ||||
| you can view that memory as a Tensor in ATen: | ||||
|  | ||||
| ```c++ | ||||
| float data[] = { 1, 2, 3, | ||||
|                  4, 5, 6}; | ||||
| auto f = CPU(kFloat).tensorFromBlob(data, {2,3}); | ||||
| cout << f << endl; | ||||
| ``` | ||||
|  | ||||
| These tensors cannot be resized because ATen does not own the memory, but otherwise | ||||
| behave as normal tensors. | ||||
|  | ||||
| ### Scalars and zero-dimensional tensors | ||||
|  | ||||
| In addition to the `Tensor` objects, ATen also includes `Scalar`s that represent a single number. | ||||
| Like a Tensor, Scalars are dynamically typed and can hold any one of ATen's number types. | ||||
| Scalars can be implicitly constructed from C++ number types. Scalars are needed because some functions like `addmm` take numbers along with Tensors and expect these | ||||
| numbers to be the same dynamic type as the tensor. They are also used in the API to indicate places where | ||||
| a function will _always_ return a Scalar value, like `sum`. | ||||
|  | ||||
| ```c++ | ||||
| Tensor addmm(Scalar beta, const Tensor & self, | ||||
|              Scalar alpha, const Tensor & mat1, | ||||
|              const Tensor & mat2); | ||||
| Scalar sum(const Tensor & self); | ||||
|  | ||||
| //usage | ||||
| Tensor a = ... | ||||
| Tensor b = ... | ||||
| Tensor c = ... | ||||
| Tensor r = addmm(1.0, a, .5, b, c); | ||||
| ``` | ||||
|  | ||||
| In addition to Scalars, ATen also allows Tensor objects to be zero-dimensional. These Tensors hold | ||||
| a single value and they can be references to a single element in a larger Tensor. They can be used anywhere a Tensor is expected. They are normally created by operators like `select` which reduce the dimensions of | ||||
| a Tensor. | ||||
|  | ||||
| ```c++ | ||||
| Tensor two = CPU(kFloat).rand({10,20}); | ||||
| two[1][2] = 4; | ||||
| //~~~~~~~  zero-dimensional Tensor | ||||
| ``` | ||||
|  | ||||
| It is possible to convert between Scalar and zero-dim Tensors: | ||||
|  | ||||
| ```c++ | ||||
| Tensor zero_dim = CPU(kFloat).scalarTensor(4); | ||||
| Scalar from_tensor = Scalar(zero_dim); //only valid when zero_dim.dim() == 0; | ||||
| ``` | ||||
|  | ||||
| ### Avoiding unnecessary CUDA synchronization in your kernels when using Scalars | ||||
|  | ||||
| Moving a single number from the GPU to the CPU introduces a synchronization point | ||||
| that can add latency to your program. In certain cases the result of a GPU operator like `sum` which | ||||
| returns a Scalar may be plugged into another GPU operator as an argument. If Scalars were always copied | ||||
| to the CPU, this would result in 2 copies. To avoid these synchronizations, Scalar objects can be | ||||
| optionally backed by a zero-dim Tensor, and are only copied to the CPU when requested. | ||||
|  | ||||
| ```c++ | ||||
| auto a = CUDA(kFloat).rand({3,4}); | ||||
| Scalar on_gpu = Scalar(a[1][1]); //backed by zero-dim Tensor | ||||
| assert(on_gpu.isBackedByTensor()); | ||||
|  | ||||
| double value = on_gpu.toDouble(); // copied to CPU, if it was backed by GPU Tensor. | ||||
| Scalar svalue = on_gpu.local(); // force the Scalar to become local to CPU. | ||||
|  | ||||
| // get the scalar as a zero-dim tensor. If it was already backed | ||||
| // by a zero-dim Tensor then this op has no synchronization. | ||||
| // if the Scalar was local on CPU, it performs the copy | ||||
| Tensor same_tensor = CUDA(kFloat).scalarTensor(on_gpu); | ||||
| ``` | ||||
|  | ||||
| Operators aware of the location of Scalars can arrange to do the minimal number of copies required. | ||||
|  | ||||
| ### Developer notes | ||||
|  | ||||
| ATen relies heavily on code generation to automatically generate headers | ||||
| and implementations for all of the tensor methods it supports.  The main | ||||
| entry point for the script which does all this work is | ||||
| [`src/ATen/gen.py`](src/ATen/gen.py), which ingests | ||||
| [`src/ATen/Declarations.cwrap`](src/ATen/Declarations.cwrap), | ||||
| [`src/ATen/nn.yaml`](src/ATen/nn.yaml), | ||||
| [`src/ATen/native/native_functions.yaml`](src/ATen/native/native_functions.yaml) and the THNN/THCUNN headers and | ||||
| produces all of the headers and wrapping code necessary to generate | ||||
| the ATen interface. | ||||
|  | ||||
| If you need to understand how ATen understands a declaration after all | ||||
| of this processing occurs, it's helpful to look at the generated file | ||||
| `Declarations.yaml` (NB: not cwrap) which contains information for all | ||||
| ATen methods in a uniform manner.  This file is utilized by PyTorch | ||||
| which further extends the ATen interface with support for automatic | ||||
| differentation. | ||||
|  | ||||
| #### Note [ATen preprocessor philosophy] | ||||
|  | ||||
| ATen is designed to be simple to use, and one of the things this implies is | ||||
| that it should not be necessary to use preprocessor macros when using ATen; | ||||
| we would rather provide all symbols, even for functionality that is not | ||||
| available on the system ATen is running on. | ||||
|  | ||||
| This means that internally inside ATen, whereas other libraries might | ||||
| simply omit source files for, e.g., CuDNN, when CuDNN libraries are not | ||||
| installed, ATen will always build these source files, compiling stub | ||||
| functions for anything that is not available.  ATen never uses | ||||
| `AT_ENABLED_CUDA()` in header files, and all types in ATen's public API | ||||
| are always available no matter your build configuration. | ||||
| @ -1,21 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -e | ||||
|  | ||||
| if [ -z "$PREFIX" ]; then | ||||
|   PREFIX="$CONDA_PREFIX" | ||||
| fi | ||||
|  | ||||
| # When conda-build constructs a new working copy to perform a build | ||||
| # in, it recursively copies *all* files and directories in the original | ||||
| # source directory, including any pre-existing build products (e.g., | ||||
| # if you previously ran cmake.)  This is problematic, because if | ||||
| # a 'build' directory already exists, cmake will reuse build settings | ||||
| # rather than recompute them from scratch.  We want a fresh build, so | ||||
| # we prophylactically remove the build directory. | ||||
| rm -rf build || true | ||||
|  | ||||
| mkdir -p build | ||||
| cd build | ||||
| cmake -DCMAKE_INSTALL_PREFIX="$PREFIX" -DCMAKE_PREFIX_PATH="$PREFIX" -DCMAKE_BUILD_TYPE=Release $CONDA_CMAKE_ARGS .. | ||||
| make install -j20 | ||||
| @ -1,33 +0,0 @@ | ||||
| {% set version = "0.1.dev" %} | ||||
|  | ||||
| package: | ||||
|   name: aten | ||||
|   version: {{ version }} | ||||
|  | ||||
| source: | ||||
|   path: .. | ||||
|  | ||||
| build: | ||||
|   number: 1 | ||||
|   skip: True  # [win] | ||||
|   script_env: | ||||
|     - CONDA_CMAKE_ARGS | ||||
|  | ||||
| requirements: | ||||
|   build: | ||||
|     - cmake | ||||
|     - pyyaml | ||||
|     - setuptools | ||||
|     - python | ||||
|     - mkl # [not osx] | ||||
|   run: | ||||
|     - mkl # [not osx] | ||||
|  | ||||
| about: | ||||
|   home: https://github.com/zdevito/ATen | ||||
|   license: BSD | ||||
|   summary: A TENsor library for C++11 | ||||
|  | ||||
| extra: | ||||
|   recipe-maintainers: | ||||
|     - ezyang | ||||
							
								
								
									
										1
									
								
								aten/src/ATen/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								aten/src/ATen/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1 +0,0 @@ | ||||
| Config.h | ||||
| @ -1,26 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ATen/core/ATenGeneral.h" | ||||
| #include "ATen/Allocator.h" | ||||
| #include "ATen/CPUGeneral.h" | ||||
| #include "ATen/CUDAGuard.h" | ||||
| #include "ATen/Context.h" | ||||
| #include "ATen/Device.h" | ||||
| #include "ATen/DeviceGuard.h" | ||||
| #include "ATen/DimVector.h" | ||||
| #include "ATen/Dispatch.h" | ||||
| #include "ATen/Formatting.h" | ||||
| #include "ATen/Functions.h" | ||||
| #include "ATen/core/Generator.h" | ||||
| #include "ATen/core/Layout.h" | ||||
| #include "ATen/OptionsGuard.h" | ||||
| #include "ATen/core/Scalar.h" | ||||
| #include "ATen/ScalarOps.h" | ||||
| #include "ATen/core/Storage.h" | ||||
| #include "ATen/Tensor.h" | ||||
| #include "ATen/TensorGeometry.h" | ||||
| #include "ATen/core/TensorMethods.h" | ||||
| #include "ATen/TensorOperators.h" | ||||
| #include "ATen/core/TensorOptions.h" | ||||
| #include "ATen/Type.h" | ||||
| #include "ATen/core/Error.h" | ||||
| @ -1,9 +0,0 @@ | ||||
| # Find the TH includes and library | ||||
| # | ||||
| # ATEN_INCLUDE_DIR -- where to find the includes | ||||
| # ATEN_LIBRARIES -- list of libraries to link against | ||||
| # ATEN_FOUND -- set to 1 if found | ||||
|  | ||||
| SET(ATEN_FOUND 1) | ||||
| SET(ATEN_INCLUDE_DIR "@ATEN_INCLUDE_DIR@") | ||||
| SET(ATEN_LIBRARIES "@ATEN_LIBRARIES@") | ||||
| @ -1,43 +0,0 @@ | ||||
| #pragma once | ||||
| #include "ATen/Config.h" | ||||
| #include "ATen/core/Half.h" | ||||
|  | ||||
| // Defines the accumulation type for a scalar type. | ||||
| // Example: | ||||
| //   using accscalar_t = acc_type<scalar_t, true>; | ||||
|  | ||||
| #ifdef __CUDACC__ | ||||
| #include <cuda.h> | ||||
| #include <cuda_fp16.h> | ||||
| #endif | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| template <typename T, bool is_cuda> | ||||
| struct AccumulateType { }; | ||||
|  | ||||
| #ifdef __CUDACC__ | ||||
| template <> struct AccumulateType<half, true> { using type = float; }; | ||||
| #endif | ||||
| template <> struct AccumulateType<Half, true> { using type = float; }; | ||||
| template <> struct AccumulateType<float, true> { using type = float; }; | ||||
| template <> struct AccumulateType<double, true> { using type = double; }; | ||||
| template <> struct AccumulateType<int8_t, true> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<uint8_t, true> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<char, true> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<int16_t, true> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<int32_t, true> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<int64_t, true> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<float, false> { using type = double; }; | ||||
| template <> struct AccumulateType<double, false> { using type = double; }; | ||||
| template <> struct AccumulateType<int8_t, false> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<uint8_t, false> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<char, false> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<int16_t, false> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<int32_t, false> { using type = int64_t; }; | ||||
| template <> struct AccumulateType<int64_t, false> { using type = int64_t; }; | ||||
|  | ||||
| template<typename T, bool is_cuda> | ||||
| using acc_type = typename AccumulateType<T, is_cuda>::type; | ||||
|  | ||||
| }  // namespace at | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Allocator.h> | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/ArrayRef.h> | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Backend.h> | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Backtrace.h> | ||||
| @ -1,384 +0,0 @@ | ||||
| cmake_minimum_required(VERSION 3.0 FATAL_ERROR) | ||||
| SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) | ||||
|  | ||||
| IF(NOT MSVC) | ||||
|   SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-qualifiers") | ||||
|   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-ignored-qualifiers") | ||||
|   SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-absolute-value") | ||||
|   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-absolute-value") | ||||
| ENDIF(NOT MSVC) | ||||
|  | ||||
| # Can be compiled standalone | ||||
| IF(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DIR OR NOT AT_INSTALL_SHARE_DIR) | ||||
|   SET(AT_INSTALL_BIN_DIR "bin" CACHE PATH "AT install binary subdirectory") | ||||
|   SET(AT_INSTALL_LIB_DIR "lib" CACHE PATH "AT install library subdirectory") | ||||
|   SET(AT_INSTALL_INCLUDE_DIR "include" CACHE PATH "AT install include subdirectory") | ||||
|   SET(AT_INSTALL_SHARE_DIR "share" CACHE PATH "AT install include subdirectory") | ||||
| ENDIF() | ||||
|  | ||||
| CONFIGURE_FILE(Config.h.in "${CMAKE_CURRENT_SOURCE_DIR}/Config.h") | ||||
| CONFIGURE_FILE(cuda/CUDAConfig.h.in "${CMAKE_CURRENT_SOURCE_DIR}/cuda/CUDAConfig.h") | ||||
|  | ||||
| # NB: If you edit these globs, you'll have to update setup.py package_data as well | ||||
| FILE(GLOB base_h "*.h" "detail/*.h") | ||||
| FILE(GLOB base_cpp "*.cpp" "detail/*.cpp") | ||||
| add_subdirectory(core) | ||||
| FILE(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh") | ||||
| FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp") | ||||
| FILE(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu") | ||||
| FILE(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh") | ||||
| FILE(GLOB cudnn_cpp "cudnn/*.cpp") | ||||
| FILE(GLOB miopen_h "miopen/*.h") | ||||
| FILE(GLOB miopen_cpp "miopen/*.cpp") | ||||
| FILE(GLOB mkl_cpp "mkl/*.cpp") | ||||
| FILE(GLOB mkldnn_cpp "mkldnn/*.cpp") | ||||
|  | ||||
| FILE(GLOB native_cpp "native/*.cpp") | ||||
| FILE(GLOB native_sparse_cpp "native/sparse/*.cpp") | ||||
| FILE(GLOB native_sparse_cuda_cu "native/sparse/cuda/*.cu") | ||||
| FILE(GLOB native_sparse_cuda_cpp "native/sparse/cuda/*.cpp") | ||||
| FILE(GLOB native_cudnn_cpp "native/cudnn/*.cpp") | ||||
| FILE(GLOB native_miopen_cpp "native/miopen/*.cpp") | ||||
| FILE(GLOB native_cuda_cu "native/cuda/*.cu") | ||||
| FILE(GLOB native_cuda_cpp "native/cuda/*.cpp") | ||||
| FILE(GLOB native_mkl_cpp "native/mkl/*.cpp") | ||||
| FILE(GLOB native_mkldnn_cpp "native/mkldnn/*.cpp") | ||||
|  | ||||
| set(all_cpu_cpp ${base_cpp} ${ATen_CORE_SRCS} ${native_cpp} ${native_sparse_cpp} ${native_mkl_cpp} ${native_mkldnn_cpp} ${generated_cpp} ${ATen_CPU_SRCS} ${cpu_kernel_cpp}) | ||||
| if(AT_MKL_ENABLED) | ||||
|   set(all_cpu_cpp ${all_cpu_cpp} ${mkl_cpp}) | ||||
| endif() | ||||
| if(AT_MKLDNN_ENABLED) | ||||
|   set(all_cpu_cpp ${all_cpu_cpp} ${mkldnn_cpp}) | ||||
| endif() | ||||
|  | ||||
| IF(USE_CUDA OR USE_ROCM) | ||||
|   list(APPEND ATen_CUDA_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/cuda) | ||||
|   set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} ${cuda_cu} ${native_cuda_cu} ${native_sparse_cuda_cu}) | ||||
|   set(all_cuda_cpp ${native_sparse_cuda_cpp} ${cuda_cpp} ${native_cuda_cpp} ${cuda_generated_cpp} ${ATen_CUDA_SRCS}) | ||||
|   IF(USE_CUDA) | ||||
|     SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${all_cuda_cpp}) | ||||
|     IF(CUDNN_FOUND) | ||||
|       SET(all_cuda_cpp ${all_cuda_cpp} ${cudnn_cpp}) | ||||
|     ENDIF() | ||||
|   ELSEIF(USE_ROCM) | ||||
|     SET(all_cuda_cpp ${native_cudnn_cpp} ${native_miopen_cpp} ${miopen_cpp} ${all_cuda_cpp}) | ||||
|   ENDIF() | ||||
| endif() | ||||
|  | ||||
| filter_list(generated_h generated_cpp "\\.h$") | ||||
| filter_list(cuda_generated_h cuda_generated_cpp "\\.h$") | ||||
|  | ||||
| list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..) | ||||
| # so the build can find the generated header files | ||||
| list(APPEND ATen_CPU_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}) | ||||
| IF(NOT AT_LINK_STYLE) | ||||
|   SET(AT_LINK_STYLE SHARED) | ||||
| ENDIF() | ||||
|  | ||||
| IF(BLAS_FOUND) | ||||
|   IF ($ENV{TH_BINARY_BUILD}) | ||||
|     MESSAGE(STATUS "TH_BINARY_BUILD detected. Enabling special linkage.") | ||||
|     list(APPEND ATen_CPU_DEPENDENCY_LIBS | ||||
|       "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") | ||||
|     if(USE_CUDA OR USE_ROCM) | ||||
|       list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|         "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") | ||||
|     endif() | ||||
|   ELSE ($ENV{TH_BINARY_BUILD}) | ||||
|     list(APPEND ATen_CPU_DEPENDENCY_LIBS ${BLAS_LIBRARIES}) | ||||
|     if(USE_CUDA OR USE_ROCM) | ||||
|       list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${BLAS_LIBRARIES}") | ||||
|     endif() | ||||
|   ENDIF ($ENV{TH_BINARY_BUILD}) | ||||
| ENDIF(BLAS_FOUND) | ||||
|  | ||||
| IF(LAPACK_FOUND) | ||||
|   list(APPEND ATen_CPU_DEPENDENCY_LIBS ${LAPACK_LIBRARIES}) | ||||
|   if(USE_CUDA OR USE_ROCM) | ||||
|     # Although Lapack provides CPU (and thus, one might expect that ATen_cuda | ||||
|     # would not need this at all), some of our libraries (magma in particular) | ||||
|     # backend to CPU BLAS/LAPACK implementations, and so it is very important | ||||
|     # we get the *right* implementation, because even if the symbols are the | ||||
|     # same, LAPACK implementions may have different calling conventions. | ||||
|     # This caused https://github.com/pytorch/pytorch/issues/7353 | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${LAPACK_LIBRARIES}) | ||||
|   endif() | ||||
| ENDIF(LAPACK_FOUND) | ||||
|  | ||||
| IF (UNIX AND NOT APPLE) | ||||
|    INCLUDE(CheckLibraryExists) | ||||
|    # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830 | ||||
|    CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" NEED_LIBRT) | ||||
|    IF(NEED_LIBRT) | ||||
|      list(APPEND ATen_CPU_DEPENDENCY_LIBS rt) | ||||
|      SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt) | ||||
|    ENDIF(NEED_LIBRT) | ||||
| ENDIF(UNIX AND NOT APPLE) | ||||
|  | ||||
| IF(UNIX) | ||||
|   SET(CMAKE_EXTRA_INCLUDE_FILES "sys/mman.h") | ||||
|   CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP) | ||||
|   IF(HAVE_MMAP) | ||||
|     ADD_DEFINITIONS(-DHAVE_MMAP=1) | ||||
|   ENDIF(HAVE_MMAP) | ||||
|   # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html | ||||
|   ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64) | ||||
|   CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN) | ||||
|   IF(HAVE_SHM_OPEN) | ||||
|     ADD_DEFINITIONS(-DHAVE_SHM_OPEN=1) | ||||
|   ENDIF(HAVE_SHM_OPEN) | ||||
|   CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK) | ||||
|   IF(HAVE_SHM_UNLINK) | ||||
|     ADD_DEFINITIONS(-DHAVE_SHM_UNLINK=1) | ||||
|   ENDIF(HAVE_SHM_UNLINK) | ||||
|   CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE) | ||||
|   IF(HAVE_MALLOC_USABLE_SIZE) | ||||
|     ADD_DEFINITIONS(-DHAVE_MALLOC_USABLE_SIZE=1) | ||||
|   ENDIF(HAVE_MALLOC_USABLE_SIZE) | ||||
| ENDIF(UNIX) | ||||
|  | ||||
| if(NOT MSVC) | ||||
|   list(APPEND ATen_CPU_DEPENDENCY_LIBS m) | ||||
| endif() | ||||
|  | ||||
| if(MKLDNN_FOUND) | ||||
|   list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES}) | ||||
| endif(MKLDNN_FOUND) | ||||
|  | ||||
| list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) | ||||
|  | ||||
| if(NOT MSVC AND NOT EMSCRIPTEN) | ||||
|   # Preserve values for the main build | ||||
|   set(__aten_sleef_build_shared_libs ${BUILD_SHARED_LIBS}) | ||||
|   set(__aten_sleef_build_tests ${BUILD_TESTS}) | ||||
|  | ||||
|   # Unset our restrictive C++ flags here and reset them later. | ||||
|   # Remove this once we use proper target_compile_options. | ||||
|   set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) | ||||
|   set(CMAKE_CXX_FLAGS) | ||||
|  | ||||
|   set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build sleef static" FORCE) | ||||
|   set(BUILD_DFT OFF CACHE BOOL "Don't build sleef DFT lib" FORCE) | ||||
|   set(BUILD_GNUABI_LIBS OFF CACHE BOOL "Don't build sleef gnuabi libs" FORCE) | ||||
|   set(BUILD_TESTS OFF CACHE BOOL "Don't build sleef tests" FORCE) | ||||
|   add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/sleef" ${CMAKE_BINARY_DIR}/sleef) | ||||
|   set_property(TARGET sleef PROPERTY FOLDER "dependencies") | ||||
|   list(APPEND ATen_THIRD_PARTY_INCLUDE ${CMAKE_BINARY_DIR}/include) | ||||
|   link_directories(${CMAKE_BINARY_DIR}/sleef/lib) | ||||
|   list(APPEND ATen_CPU_DEPENDENCY_LIBS sleef) | ||||
|  | ||||
|   set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS}) | ||||
|  | ||||
|   # Set these back. TODO: Use SLEEF_ to pass these instead | ||||
|   set(BUILD_SHARED_LIBS ${__aten_sleef_build_shared_libs} CACHE BOOL "Build shared libs" FORCE) | ||||
|   set(BUILD_TESTS ${__aten_sleef_build_tests} CACHE BOOL "Build tests" FORCE) | ||||
| endif() | ||||
|  | ||||
| IF(USE_CUDA AND NOT USE_ROCM) | ||||
|   IF ($ENV{ATEN_STATIC_CUDA}) | ||||
|     # CuFFT has a complicated static story (especially around CUDA < 9) because it has device callback support | ||||
|     # we first have to build a fake lib that links with no device callbacks, | ||||
|     # and then we link against this object file. | ||||
|     # This was recommended by the CuFFT team at NVIDIA | ||||
|  | ||||
|     # build fake CuFFT lib in build dir | ||||
|     EXECUTE_PROCESS(COMMAND touch ${CMAKE_CURRENT_BINARY_DIR}/empty_file.cc) | ||||
|     if(${CUDA_VERSION_MAJOR} EQUAL "8") | ||||
|       SET(CUFFT_FAKELINK_OPTIONS | ||||
| 	--generate-code arch=compute_35,code=sm_35 | ||||
| 	--generate-code arch=compute_50,code=sm_50 | ||||
| 	--generate-code arch=compute_60,code=sm_60) | ||||
|     elseif(${CUDA_VERSION_MAJOR} EQUAL "9") | ||||
|       SET(CUFFT_FAKELINK_OPTIONS | ||||
| 	--generate-code arch=compute_35,code=sm_35 | ||||
| 	--generate-code arch=compute_50,code=sm_50 | ||||
| 	--generate-code arch=compute_60,code=sm_60 | ||||
| 	--generate-code arch=compute_70,code=sm_70) | ||||
|     else() | ||||
|       MESSAGE(FATAL_ERROR "Unhandled major cuda version ${CUDA_VERSION_MAJOR}") | ||||
|     endif() | ||||
|     ADD_CUSTOM_COMMAND( | ||||
|       OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a | ||||
|       COMMAND "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" -o ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a -Xcompiler -fPIC | ||||
|       ${CUFFT_FAKELINK_OPTIONS} | ||||
|       --device-link ${CMAKE_CURRENT_BINARY_DIR}/empty_file.cc -lcufft_static -lculibos | ||||
|       ) | ||||
|     ADD_CUSTOM_TARGET(FAKELINKED_CUFFT_TARGET DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a) | ||||
|     add_library(FAKELINKED_CUFFT STATIC IMPORTED GLOBAL) | ||||
|     add_dependencies(FAKELINKED_CUFFT FAKELINKED_CUFFT_TARGET) | ||||
|     set_target_properties(FAKELINKED_CUFFT PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a) | ||||
|  | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|       ${CUDA_LIBRARIES} | ||||
|       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusparse_static.a | ||||
|       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a | ||||
|       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a | ||||
|       FAKELINKED_CUFFT | ||||
|       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static.a | ||||
|       ) | ||||
|   ELSE() | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|       ${CUDA_LIBRARIES} | ||||
|       ${CUDA_cusparse_LIBRARY} | ||||
|       ${CUDA_curand_LIBRARY}) | ||||
|   ENDIF() | ||||
|  | ||||
|   if(CUDNN_FOUND) | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${CUDNN_LIBRARIES}) | ||||
|   endif(CUDNN_FOUND) | ||||
|  | ||||
|   IF(USE_MAGMA) | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${MAGMA_LIBRARIES}) | ||||
|     IF ($ENV{TH_BINARY_BUILD}) | ||||
|       list(APPEND ATen_CUDA_DEPENDENCY_LIBS | ||||
|         "${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}") | ||||
|     ENDIF($ENV{TH_BINARY_BUILD}) | ||||
|   ENDIF(USE_MAGMA) | ||||
|   IF ($ENV{ATEN_STATIC_CUDA}) | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a") | ||||
|   ENDIF($ENV{ATEN_STATIC_CUDA}) | ||||
| ENDIF() | ||||
|  | ||||
| IF(USE_ROCM) | ||||
|  ### Link in the ROCm libraries BLAS / RNG. | ||||
|  FIND_LIBRARY(ROCBLAS_LIBRARY rocblas HINTS ${ROCBLAS_PATH}/lib) | ||||
|  FIND_LIBRARY(HIPRAND_LIBRARY hiprand HINTS ${HIPRAND_PATH}/lib) | ||||
|  | ||||
|  list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${ROCBLAS_LIBRARY} ${HIPRAND_LIBRARY}) | ||||
| ENDIF() | ||||
|  | ||||
| # Include CPU paths for CUDA as well | ||||
| list(APPEND ATen_CUDA_INCLUDE ${ATen_CPU_INCLUDE}) | ||||
|  | ||||
| # We have two libraries: libATen_cpu.so and libATen_cuda.so, | ||||
| # with libATen_cuda.so depending on libATen_cpu.so.  The CPU library | ||||
| # contains CPU code only.  libATen_cpu.so is invariant to the setting | ||||
| # of USE_CUDA (it always builds the same way); libATen_cuda.so is only | ||||
| # built when USE_CUDA=1 and CUDA is available. | ||||
| set(ATen_CPU_SRCS ${all_cpu_cpp}) | ||||
| if(AT_LINK_STYLE STREQUAL "INTERFACE") | ||||
|   # Source code can't be added to an interface library, so it is | ||||
|   # passed back to be compiled into the containing library | ||||
|   add_library(ATen_cpu INTERFACE) | ||||
|   list(APPEND ATen_CPU_DEPENDENCY_LIBS ATEN_CPU_FILES_GEN_LIB) | ||||
| else() | ||||
|   add_library(ATen_cpu ${AT_LINK_STYLE} ${ATen_CPU_SRCS}) | ||||
|   if (ATen_THIRD_PARTY_INCLUDE) | ||||
|     target_include_directories(ATen_cpu SYSTEM PRIVATE ${ATen_THIRD_PARTY_INCLUDE}) | ||||
|   endif() | ||||
|   target_include_directories(ATen_cpu INTERFACE $<INSTALL_INTERFACE:include>) | ||||
|   target_include_directories(ATen_cpu PRIVATE ${ATen_CPU_INCLUDE}) | ||||
|   target_link_libraries(ATen_cpu PUBLIC ${ATen_CPU_DEPENDENCY_LIBS}) | ||||
|   target_link_libraries(ATen_cpu PRIVATE ATEN_CPU_FILES_GEN_LIB) | ||||
|   caffe2_interface_library(ATen_cpu ATen_cpu_library) | ||||
|   # Set standard properties on the target | ||||
|   aten_set_target_props(ATen_cpu) | ||||
|  | ||||
|   # Make sure these don't get built by parent | ||||
|   set(ATen_CPU_SRCS) | ||||
| endif() | ||||
|  | ||||
| if(USE_CUDA OR USE_ROCM) | ||||
|   set(ATen_CUDA_SRCS ${all_cuda_cpp}) | ||||
|   if(AT_LINK_STYLE STREQUAL "INTERFACE") | ||||
|     # Source code can't be added to an interface library, so it is | ||||
|     # passed back to be compiled into the containing library | ||||
|     add_library(ATen_cuda INTERFACE) | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS ATEN_CUDA_FILES_GEN_LIB) | ||||
|   else() | ||||
|     # A hack to deal with cuda library dependencies and modern CMake: the | ||||
|     # CUDA_ADD_LIBRARY includes a target_link_libraries, and as a result, | ||||
|     # one cannot use PUBLIC/PRIVATE/INTERFACE for the target anymore. This | ||||
|     # hack adds the PRIVATE keywords to CUDA_LIBRARIES so we can deal with | ||||
|     # it. We will then manually add the cudart library as interface libs. | ||||
|     set(__tmp ${CUDA_LIBRARIES}) | ||||
|     set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) | ||||
|     torch_cuda_based_add_library(ATen_cuda ${AT_LINK_STYLE} ${ATen_CUDA_SRCS}) | ||||
|     set(CUDA_LIBRARIES ${__tmp}) | ||||
|     target_link_libraries(ATen_cuda INTERFACE caffe2::cudart) | ||||
|  | ||||
|     target_include_directories( | ||||
|         ATen_cuda INTERFACE $<INSTALL_INTERFACE:include>) | ||||
|     target_include_directories( | ||||
|         ATen_cuda PRIVATE ${ATen_THIRD_PARTY_INCLUDE}) | ||||
|     target_include_directories( | ||||
|         ATen_cuda PRIVATE ${ATen_CUDA_INCLUDE}) | ||||
|     target_link_libraries( | ||||
|         ATen_cuda PRIVATE ${ATen_CUDA_DEPENDENCY_LIBS} ATEN_CUDA_FILES_GEN_LIB) | ||||
|  | ||||
|     # These public dependencies must go after the previous dependencies, as the | ||||
|     # order of the libraries in the linker call matters here when statically | ||||
|     # linking; libculibos and cublas must be last. | ||||
|     target_link_libraries( | ||||
|         ATen_cuda PUBLIC ATen_cpu ${ATen_PUBLIC_CUDA_DEPENDENCY_LIBS}) | ||||
|  | ||||
|     # Set standard properties on the target | ||||
|     aten_set_target_props(ATen_cuda) | ||||
|  | ||||
|     caffe2_interface_library(ATen_cuda ATen_cuda_library) | ||||
|  | ||||
|     # Make sure these don't get built by parent | ||||
|     set(ATen_CUDA_SRCS) | ||||
|   endif() | ||||
| endif() | ||||
|  | ||||
| if(NOT AT_LINK_STYLE STREQUAL "INTERFACE") | ||||
|   if(USE_CUDA) | ||||
|     if (NOT $ENV{ATEN_STATIC_CUDA}) | ||||
|       cuda_add_cublas_to_target(ATen_cuda) | ||||
|       cuda_add_cufft_to_target(ATen_cuda) | ||||
|     endif() | ||||
|   endif() | ||||
|  | ||||
|   if(NOT MSVC) | ||||
|     aten_compile_options(ATen_cpu) | ||||
|     if(USE_CUDA OR USE_ROCM) | ||||
|       aten_compile_options(ATen_cuda) | ||||
|     endif() | ||||
|   endif() | ||||
|  | ||||
|   if(NOT ${CMAKE_VERSION} VERSION_LESS "3.1") | ||||
|     set_property(TARGET ATen_cpu PROPERTY CXX_STANDARD 11) | ||||
|     if(USE_CUDA OR USE_ROCM) | ||||
|       set_property(TARGET ATen_cuda PROPERTY CXX_STANDARD 11) | ||||
|     endif() | ||||
|   endif() | ||||
| endif() | ||||
|  | ||||
| SET(ATEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${AT_INSTALL_INCLUDE_DIR}") | ||||
| CONFIGURE_FILE(ATenConfig.cmake.in "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake") | ||||
| INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/ATenConfig.cmake" | ||||
|   DESTINATION "${AT_INSTALL_SHARE_DIR}/cmake/ATen") | ||||
|  | ||||
| # https://stackoverflow.com/questions/11096471/how-can-i-install-a-hierarchy-of-files-using-cmake | ||||
| FOREACH(HEADER ${base_h} ${ATen_CORE_HEADERS} ${cuda_h} ${cudnn_h}) | ||||
|   string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" HEADER_SUB ${HEADER}) | ||||
|   GET_FILENAME_COMPONENT(DIR ${HEADER_SUB} DIRECTORY) | ||||
|   INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/${DIR}) | ||||
| ENDFOREACH() | ||||
| FOREACH(HEADER ${generated_h} ${cuda_generated_h}) | ||||
|   # NB: Assumed to be flat | ||||
|   INSTALL(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen) | ||||
| ENDFOREACH() | ||||
| INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml | ||||
|   DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) | ||||
|  | ||||
| if(ATEN_NO_TEST) | ||||
|   message("disable test because ATEN_NO_TEST is set") | ||||
| else() | ||||
|   add_subdirectory(test) | ||||
| endif() | ||||
|  | ||||
| # Pass source, includes, and libs to parent | ||||
| set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE) | ||||
| set(ATen_CPU_INCLUDE ${ATen_CPU_INCLUDE} PARENT_SCOPE) | ||||
| set(ATen_THIRD_PARTY_INCLUDE ${ATen_THIRD_PARTY_INCLUDE} PARENT_SCOPE) | ||||
| set(ATen_CUDA_INCLUDE ${ATen_CUDA_INCLUDE} PARENT_SCOPE) | ||||
| set(ATen_CPU_DEPENDENCY_LIBS ${ATen_CPU_DEPENDENCY_LIBS} PARENT_SCOPE) | ||||
| set(ATen_CUDA_DEPENDENCY_LIBS ${ATen_CUDA_DEPENDENCY_LIBS} PARENT_SCOPE) | ||||
| @ -1,567 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ATen/Parallel.h" | ||||
| #include "ATen/TensorUtils.h" | ||||
| #include <limits> | ||||
| #include <utility> | ||||
| #include <cstring> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| /* | ||||
| [collapse dims] Updates sizes, and strides to reflect a "collapse" of | ||||
| the info, possibly excluding the optional excludeDim. A "collapsed" version | ||||
| of the info is the fewest dims that order the tensor's elements in the same | ||||
| way as the original info. If excludeDim is specified, the collapse is the | ||||
| fewest dims that order the tensor's elements as the original and preserve the | ||||
| excluded dimension, unless the tensor collapses to a point. | ||||
|  | ||||
| This function returns a pair of values. | ||||
|  | ||||
| 1) The (new) index of the preserved dimension if excludeDim is | ||||
| specified. 0 if the tensor is collapsed to a point. -1 | ||||
| otherwise. | ||||
|  | ||||
| 2) The new number of dimensions. | ||||
| */ | ||||
| template <typename T> | ||||
| inline std::pair<int64_t, int64_t> collapse_dims( | ||||
|     T* sizes, | ||||
|     T* strides, | ||||
|     int64_t dims, | ||||
|     const int excludeDim = -1) { | ||||
|   AT_CHECK( | ||||
|       excludeDim >= -1 && excludeDim < dims, | ||||
|       "expected excluded dim between -1 and dims - 1"); | ||||
|  | ||||
|   int64_t stopDim = (excludeDim == -1) ? dims : excludeDim; | ||||
|   int64_t newIndex = -1; | ||||
|   int64_t oldIndex = 0; | ||||
|   int64_t remappedExcludedDim = -1; | ||||
|  | ||||
|   while (oldIndex < dims) { | ||||
|     // Finds a dimension to collapse into | ||||
|     for (; oldIndex < stopDim; ++oldIndex) { | ||||
|       if (sizes[oldIndex] == 1) { | ||||
|         continue; | ||||
|       } | ||||
|  | ||||
|       ++newIndex; | ||||
|       sizes[newIndex] = sizes[oldIndex]; | ||||
|       strides[newIndex] = strides[oldIndex]; | ||||
|       ++oldIndex; | ||||
|       break; | ||||
|     } | ||||
|  | ||||
|     // Collapses dims | ||||
|     for (; oldIndex < stopDim; ++oldIndex) { | ||||
|       if (sizes[oldIndex] == 1) { | ||||
|         continue; | ||||
|       } | ||||
|  | ||||
|       if (strides[newIndex] == sizes[oldIndex] * strides[oldIndex]) { | ||||
|         sizes[newIndex] *= sizes[oldIndex]; | ||||
|         strides[newIndex] = strides[oldIndex]; | ||||
|       } else { | ||||
|         ++newIndex; | ||||
|         sizes[newIndex] = sizes[oldIndex]; | ||||
|         strides[newIndex] = strides[oldIndex]; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // Handles excludeDim being set (oldIndex == excludeDim) | ||||
|     if (oldIndex != dims) { | ||||
|       // Preserves excluded dimension | ||||
|       ++newIndex; | ||||
|       sizes[newIndex] = sizes[oldIndex]; | ||||
|       strides[newIndex] = strides[oldIndex]; | ||||
|       remappedExcludedDim = newIndex; | ||||
|  | ||||
|       // Restarts iteration after excludeDim | ||||
|       ++oldIndex; | ||||
|       stopDim = dims; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Handles special case of all dims size 1 | ||||
|   if (newIndex == -1 || (newIndex == 0 && sizes[0] == 1)) { | ||||
|     dims = 1; | ||||
|     sizes[0] = 1; | ||||
|     strides[0] = 1; | ||||
|  | ||||
|     return std::pair<int64_t, int64_t>(0, 1); | ||||
|   } | ||||
|  | ||||
|   dims = newIndex + 1; | ||||
|   return std::pair<int64_t, int64_t>(remappedExcludedDim, dims); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * The basic strategy for apply is as follows: | ||||
|  * | ||||
|  * 1. Starting with the outermost index, loop until we reach a dimension where | ||||
|  * the data is no longer contiguous, i.e. the stride at that dimension is not | ||||
|  * equal to the size of the tensor defined by the outer dimensions. Let's call | ||||
|  * this outer (contiguous) tensor A. Note that if the Tensor is contiguous, then | ||||
|  * A is equal to the entire Tensor. Let's call the inner tensor B. | ||||
|  * | ||||
|  * 2. We loop through the indices in B, starting at its outermost dimension. For | ||||
|  * example, if B is a 2x2 matrix, then we do: | ||||
|  * | ||||
|  * B[0][0] | ||||
|  * B[0][1] | ||||
|  * B[1][0] | ||||
|  * B[1][1] | ||||
|  * | ||||
|  * We set the offset into the underlying storage as (storageOffset + stride_B * | ||||
|  * index_B), i.e. basically we compute the offset into the storage as we would | ||||
|  * normally for a Tensor. But because we are guaranteed the subsequent data is | ||||
|  * contiguous in memory, we can simply loop for sizeof(A) iterations and perform | ||||
|  * the operation, without having to follow the order described by the strides of | ||||
|  * A. | ||||
|  * | ||||
|  * 3. As an optimization, we merge dimensions of A that are contiguous in | ||||
|  * memory. For example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, | ||||
|  * then the first two dimensions can be merged for the purposes of APPLY, | ||||
|  * reducing the number of nested loops. | ||||
|  */ | ||||
|  | ||||
| inline Tensor sort_strides(Tensor& tensor_) { | ||||
|   IntList strides = tensor_.strides(); | ||||
|   std::vector<int64_t> indices; | ||||
|   indices.reserve(tensor_.ndimension()); | ||||
|   for (int64_t i = 0; i < tensor_.ndimension(); i++) { | ||||
|     indices.push_back(i); | ||||
|   } | ||||
|   std::sort(indices.begin(), indices.end(), [&strides](int64_t i1, int64_t i2) { | ||||
|     return strides[i1] > strides[i2]; | ||||
|   }); | ||||
|   Tensor tensor = tensor_.permute(indices); | ||||
|   return tensor; | ||||
| } | ||||
|  | ||||
| template <typename T, int N> | ||||
| struct strided_tensor_iter_fixed { | ||||
|  public: | ||||
|   T* data_ = NULL; | ||||
|   int64_t dim_ = 0; | ||||
|  | ||||
|   int64_t counter_[N] = {0}; | ||||
|   int64_t sizes_[N] = {0}; | ||||
|   int64_t strides_[N] = {0}; | ||||
|  | ||||
|   strided_tensor_iter_fixed(strided_tensor_iter_fixed const&) = delete; | ||||
|   void operator=(strided_tensor_iter_fixed const& x) = delete; | ||||
|   strided_tensor_iter_fixed(strided_tensor_iter_fixed&&) = default; | ||||
|   strided_tensor_iter_fixed(Tensor& tensor, bool sort_strides = false) | ||||
|       : data_(tensor.data<T>()) { | ||||
|     std::memset(counter_, 0, sizeof(int64_t) * N); | ||||
|     std::memcpy( | ||||
|         sizes_, tensor.sizes().data(), tensor.ndimension() * sizeof(int64_t)); | ||||
|     std::memcpy( | ||||
|         strides_, | ||||
|         tensor.strides().data(), | ||||
|         tensor.ndimension() * sizeof(int64_t)); | ||||
|     dim_ = std::get<1>(collapse_dims(sizes_, strides_, tensor.ndimension())); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <typename T> | ||||
| struct strided_tensor_iter { | ||||
|  private: | ||||
|  public: | ||||
|   T* data_ = NULL; | ||||
|   int64_t dim_; | ||||
|  | ||||
|   std::vector<int64_t> counter_; | ||||
|   std::vector<int64_t> sizes_; | ||||
|   std::vector<int64_t> strides_; | ||||
|  | ||||
|   strided_tensor_iter(strided_tensor_iter const&) = delete; | ||||
|   void operator=(strided_tensor_iter const& x) = delete; | ||||
|   strided_tensor_iter(strided_tensor_iter&&) = default; | ||||
|   strided_tensor_iter(Tensor& tensor) | ||||
|       : data_(tensor.data<T>()), | ||||
|         dim_(tensor.ndimension()), | ||||
|         counter_(dim_, 0), | ||||
|         sizes_(tensor.sizes().vec()), | ||||
|         strides_(tensor.strides().vec()) { | ||||
|     dim_ = std::get<1>(collapse_dims(sizes_.data(), strides_.data(), dim_)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| inline bool _all_equal_numel(at::ArrayRef<Tensor> tensors) { | ||||
|   if (tensors.size() == 0) | ||||
|     return true; | ||||
|   int64_t all_numel = tensors[0].numel(); | ||||
|   for (size_t i = 1; i < tensors.size(); i++) { | ||||
|     if (tensors[i].numel() != all_numel) | ||||
|       return false; | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| inline std::string _all_equal_numel_error(at::ArrayRef<Tensor> tensors) { | ||||
|   std::ostringstream oss; | ||||
|   oss << "inconsistent tensor size, expected "; | ||||
|   for (size_t i = 0; i < tensors.size() - 1; i++) { | ||||
|     oss << tensors[i].sizes() << ", "; | ||||
|   } | ||||
|   oss << "and " << tensors[tensors.size() - 1] | ||||
|       << " to have the same number of elements, but got "; | ||||
|   for (size_t i = 0; i < tensors.size() - 1; i++) { | ||||
|     oss << tensors[i].numel() << ", "; | ||||
|   } | ||||
|   oss << "and " << tensors[tensors.size() - 1].numel() | ||||
|       << " elements respectively"; | ||||
|   return oss.str(); | ||||
| } | ||||
|  | ||||
| inline bool _apply_preamble(ArrayRef<Tensor> tensors) { | ||||
|   checkBackend("CPU_tensor_apply", tensors, Backend::CPU); | ||||
|   if (!_all_equal_numel(tensors)) | ||||
|     throw std::runtime_error(_all_equal_numel_error(tensors)); | ||||
|   // An empty tensor has no elements | ||||
|   for (auto& t : tensors) | ||||
|     if (t.numel() == 0) | ||||
|       return false; | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| inline int64_t _max_dim_tensors(ArrayRef<Tensor> tensors) { | ||||
|   int64_t dim = 0; | ||||
|   for (auto& t : tensors) | ||||
|     dim = std::max(dim, t.ndimension()); | ||||
|   return dim; | ||||
| } | ||||
|  | ||||
| inline void iterate(int64_t size){}; | ||||
|  | ||||
| template <typename Arg, typename... Args> | ||||
| inline void iterate(int64_t size, Arg& iter, Args&... iter_tail) { | ||||
|   iter.counter_[iter.dim_ - 1] += size; | ||||
|   iter.data_ = iter.data_ + size * iter.strides_[iter.dim_ - 1]; | ||||
|   iterate(size, iter_tail...); | ||||
| } | ||||
|  | ||||
| inline bool iterate_continue() { | ||||
|   return true; | ||||
| }; | ||||
|  | ||||
| template <typename Arg, typename... Args> | ||||
| inline bool iterate_continue(Arg& iter, Args&... iter_tail) { | ||||
|   return iter.counter_[iter.dim_ - 1] < iter.sizes_[iter.dim_ - 1] && | ||||
|       iterate_continue(iter_tail...); | ||||
| } | ||||
|  | ||||
| inline int64_t max_iterate_size() { | ||||
|   return std::numeric_limits<int64_t>::max(); | ||||
| }; | ||||
|  | ||||
| template <typename Arg, typename... Args> | ||||
| inline int64_t max_iterate_size(Arg& iter, Args&... iter_tail) { | ||||
|   return std::min( | ||||
|       (iter.sizes_[iter.dim_ - 1] - iter.counter_[iter.dim_ - 1]), | ||||
|       max_iterate_size(iter_tail...)); | ||||
| } | ||||
|  | ||||
| inline void iterate_overflow(){}; | ||||
|  | ||||
| template <typename Arg, typename... Args> | ||||
| inline void iterate_overflow(Arg& iter, Args&... iter_tail) { | ||||
|   if (iter.counter_[iter.dim_ - 1] == iter.sizes_[iter.dim_ - 1]) { | ||||
|     for (int64_t i = iter.dim_ - 1; i > 0; i--) { | ||||
|       if (iter.counter_[i] == iter.sizes_[i]) { | ||||
|         iter.counter_[i] = 0; | ||||
|         iter.counter_[i - 1]++; | ||||
|         iter.data_ = iter.data_ - (iter.sizes_[i] * iter.strides_[i]) + | ||||
|             iter.strides_[i - 1]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   iterate_overflow(iter_tail...); | ||||
| } | ||||
|  | ||||
| inline void forward(int64_t offset){}; | ||||
|  | ||||
| template <typename Arg, typename... Args> | ||||
| inline void forward(int64_t offset, Arg& iter, Args&... iter_tail) { | ||||
|   int64_t multi = offset; | ||||
|   for (int64_t i = iter.dim_ - 1; i >= 0; i--) { | ||||
|     int64_t inc = multi % iter.sizes_[i]; | ||||
|     multi = multi / iter.sizes_[i]; | ||||
|     iter.data_ = iter.data_ + inc * iter.strides_[i]; | ||||
|     iter.counter_[i] += inc; | ||||
|   } | ||||
|   forward(offset, iter_tail...); | ||||
| } | ||||
|  | ||||
| inline int64_t max_dim() { | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| template <typename Arg, typename... Args> | ||||
| inline int64_t max_dim(Arg& iter, Args&... iter_tail) { | ||||
|   return std::max(iter.dim_, max_dim(iter_tail...)); | ||||
| } | ||||
|  | ||||
| inline void apply_op(){}; | ||||
|  | ||||
| template <typename Op, typename... Args> | ||||
| inline void | ||||
| apply_op(int64_t numel, int64_t offset, const Op& op, Args... iters) { | ||||
|   // For 0-dim tensors | ||||
|   if (numel == 1 && max_dim(iters...) == 0) { | ||||
|     op(*iters.data_...); | ||||
|     return; | ||||
|   } | ||||
|   if (offset > 0) | ||||
|     forward(offset, iters...); | ||||
|   // Splitting this into chunks helps the compiler create faster assembly | ||||
|   for (int64_t i = 0; i < numel;) { | ||||
|     for (; iterate_continue(iters...) && i < numel;) { | ||||
|       op(*iters.data_...); | ||||
|       iterate(1, iters...); | ||||
|       i++; | ||||
|     } | ||||
|     iterate_overflow(iters...); | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| inline void apply_kernel(){}; | ||||
|  | ||||
| // TODO: Deal elegantly with 0-dim tensors. iters.strides_ of 0-dim | ||||
| // strided_tensor_iter will be of size 0 for dim 0 and iters.strides_[iters.dim_ | ||||
| // - 1] will index at -1. C++14 integer_sequence could be of use here. | ||||
| template <typename Op, typename... Args> | ||||
| inline void | ||||
| apply_kernel(int64_t numel, int64_t offset, const Op& op, Args... iters) { | ||||
|   if (offset > 0) | ||||
|     forward(offset, iters...); | ||||
|   int64_t size = std::min(numel, max_iterate_size(iters...)); | ||||
|   op(size, iters.data_..., iters.strides_[iters.dim_ - 1]...); | ||||
|   iterate(size, iters...); | ||||
|   iterate_overflow(iters...); | ||||
|   int64_t i = size; | ||||
|   size = std::min(numel, max_iterate_size(iters...)); | ||||
|   for (; i < numel;) { | ||||
|     op(size, iters.data_..., iters.strides_[iters.dim_ - 1]...); | ||||
|     iterate(size, iters...); | ||||
|     i += size; | ||||
|     iterate_overflow(iters...); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename scalar1, typename scalar2, typename Op> | ||||
| inline void | ||||
| CPU_tensor_parallel_kernel_apply2(Tensor tensor1, Tensor tensor2, const Op op) { | ||||
|   if (!_apply_preamble({tensor1, tensor2})) | ||||
|     return; | ||||
|   if (tensor1.numel() == 1) { | ||||
|     op(1, tensor1.data<scalar1>(), tensor2.data<scalar2>(), 0, 0); | ||||
|     return; | ||||
|   } | ||||
|   if (tensor1.ndimension() < 8 && tensor2.ndimension() < 8) { | ||||
|     parallel_for( | ||||
|         0, | ||||
|         tensor1.numel(), | ||||
|         1, | ||||
|         [&tensor1, &tensor2, &op](int64_t begin, int64_t end) { | ||||
|           apply_kernel( | ||||
|               end - begin, | ||||
|               begin, | ||||
|               op, | ||||
|               strided_tensor_iter_fixed<scalar1, 8>(tensor1), | ||||
|               strided_tensor_iter_fixed<scalar2, 8>(tensor2)); | ||||
|         }); | ||||
|   } else { | ||||
|     parallel_for( | ||||
|         0, | ||||
|         tensor1.numel(), | ||||
|         1, | ||||
|         [&tensor1, &tensor2, &op](int64_t begin, int64_t end) { | ||||
|           apply_kernel( | ||||
|               end - begin, | ||||
|               begin, | ||||
|               op, | ||||
|               strided_tensor_iter<scalar1>(tensor1), | ||||
|               strided_tensor_iter<scalar2>(tensor2)); | ||||
|         }); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /* | ||||
|   Apply a pointwise operator to sequence of tensors | ||||
|  | ||||
|   The calling convention for op is a function/functor that takes takes the same | ||||
|   number of pointers of type scalar as the number of given tensors. For example, | ||||
|   to compute a = b * c, op would be of the form: | ||||
|   [](scalar* a_val, const scalar* b_val, const scalar* c_val) { a_val[0] = | ||||
|   b_val[0] * c_val[0]; }; | ||||
| */ | ||||
|  | ||||
| template <typename scalar1, typename Op> | ||||
| inline void CPU_tensor_apply1(Tensor tensor1, const Op op) { | ||||
|   if (!_apply_preamble({tensor1})) | ||||
|     return; | ||||
|   if (tensor1.ndimension() < 8) { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter_fixed<scalar1, 8>(tensor1, true)); | ||||
|   } else { | ||||
|     apply_op(tensor1.numel(), 0, op, strided_tensor_iter<scalar1>(tensor1)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename scalar1, typename scalar2, typename Op> | ||||
| inline void CPU_tensor_apply2(Tensor tensor1, Tensor tensor2, const Op op) { | ||||
|   if (!_apply_preamble({tensor1, tensor2})) | ||||
|     return; | ||||
|   if (_max_dim_tensors({tensor1, tensor2}) <= 8) { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter_fixed<scalar1, 8>(tensor1), | ||||
|         strided_tensor_iter_fixed<scalar2, 8>(tensor2)); | ||||
|   } else { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter<scalar1>(tensor1), | ||||
|         strided_tensor_iter<scalar2>(tensor2)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename scalar1, typename scalar2, typename scalar3, typename Op> | ||||
| inline void | ||||
| CPU_tensor_apply3(Tensor tensor1, Tensor tensor2, Tensor tensor3, const Op op) { | ||||
|   if (!_apply_preamble({tensor1, tensor2, tensor3})) | ||||
|     return; | ||||
|   if (_max_dim_tensors({tensor1, tensor2, tensor3}) <= 8) { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter_fixed<scalar1, 8>(tensor1), | ||||
|         strided_tensor_iter_fixed<scalar2, 8>(tensor2), | ||||
|         strided_tensor_iter_fixed<scalar3, 8>(tensor3)); | ||||
|   } else { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter<scalar1>(tensor1), | ||||
|         strided_tensor_iter<scalar2>(tensor2), | ||||
|         strided_tensor_iter<scalar3>(tensor3)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template < | ||||
|     typename scalar1, | ||||
|     typename scalar2, | ||||
|     typename scalar3, | ||||
|     typename scalar4, | ||||
|     typename Op> | ||||
| inline void CPU_tensor_apply4( | ||||
|     Tensor tensor1, | ||||
|     Tensor tensor2, | ||||
|     Tensor tensor3, | ||||
|     Tensor tensor4, | ||||
|     const Op op) { | ||||
|   if (!_apply_preamble({tensor1, tensor2, tensor3, tensor4})) | ||||
|     return; | ||||
|   if (_max_dim_tensors({tensor1, tensor2, tensor3, tensor4}) <= 8) { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter_fixed<scalar1, 8>(tensor1), | ||||
|         strided_tensor_iter_fixed<scalar2, 8>(tensor2), | ||||
|         strided_tensor_iter_fixed<scalar3, 8>(tensor3), | ||||
|         strided_tensor_iter_fixed<scalar4, 8>(tensor4)); | ||||
|   } else { | ||||
|     apply_op( | ||||
|         tensor1.numel(), | ||||
|         0, | ||||
|         op, | ||||
|         strided_tensor_iter<scalar1>(tensor1), | ||||
|         strided_tensor_iter<scalar2>(tensor2), | ||||
|         strided_tensor_iter<scalar3>(tensor3), | ||||
|         strided_tensor_iter<scalar4>(tensor4)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename scalar1, typename Op> | ||||
| inline void CPU_tensor_parallel_apply1( | ||||
|     Tensor tensor1, | ||||
|     const Op op, | ||||
|     int64_t grain_size = internal::GRAIN_SIZE) { | ||||
|   if (!_apply_preamble({tensor1})) | ||||
|     return; | ||||
|   if (tensor1.ndimension() < 8) { | ||||
|     parallel_for( | ||||
|         0, | ||||
|         tensor1.numel(), | ||||
|         grain_size, | ||||
|         [&tensor1, &op](int64_t begin, int64_t end) { | ||||
|           apply_op( | ||||
|               end - begin, | ||||
|               begin, | ||||
|               op, | ||||
|               strided_tensor_iter_fixed<scalar1, 8>(tensor1, true)); | ||||
|         }); | ||||
|   } else { | ||||
|     parallel_for( | ||||
|         0, | ||||
|         tensor1.numel(), | ||||
|         grain_size, | ||||
|         [&tensor1, &op](int64_t begin, int64_t end) { | ||||
|           apply_op( | ||||
|               end - begin, begin, op, strided_tensor_iter<scalar1>(tensor1)); | ||||
|         }); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename scalar1, typename scalar2, typename Op> | ||||
| inline void CPU_tensor_parallel_apply2( | ||||
|     Tensor tensor1, | ||||
|     Tensor tensor2, | ||||
|     const Op op, | ||||
|     int64_t grain_size = internal::GRAIN_SIZE) { | ||||
|   if (!_apply_preamble({tensor1, tensor2})) | ||||
|     return; | ||||
|   if (tensor1.ndimension() < 8 && tensor2.ndimension() < 8) { | ||||
|     parallel_for( | ||||
|         0, | ||||
|         tensor1.numel(), | ||||
|         grain_size, | ||||
|         [&tensor1, &tensor2, &op](int64_t begin, int64_t end) { | ||||
|           apply_op( | ||||
|               end - begin, | ||||
|               begin, | ||||
|               op, | ||||
|               strided_tensor_iter_fixed<scalar1, 8>(tensor1), | ||||
|               strided_tensor_iter_fixed<scalar2, 8>(tensor2)); | ||||
|         }); | ||||
|   } else { | ||||
|     parallel_for( | ||||
|         0, | ||||
|         tensor1.numel(), | ||||
|         grain_size, | ||||
|         [&tensor1, &tensor2, &op](int64_t begin, int64_t end) { | ||||
|           apply_op( | ||||
|               end - begin, | ||||
|               begin, | ||||
|               op, | ||||
|               strided_tensor_iter<scalar1>(tensor1), | ||||
|               strided_tensor_iter<scalar2>(tensor2)); | ||||
|         }); | ||||
|   } | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,16 +0,0 @@ | ||||
| #include <ATen/CPUGeneral.h> | ||||
| #include <atomic> | ||||
| #include <memory> | ||||
| #include <thread> | ||||
|  | ||||
| namespace at { | ||||
| // Lock free atomic type | ||||
| std::atomic<int> num_threads(-1); | ||||
|  | ||||
| void set_num_threads(int num_threads_) { | ||||
|   if (num_threads_ >= 0) | ||||
|     num_threads.store(num_threads_); | ||||
| } | ||||
|  | ||||
| int get_num_threads() { return num_threads.load(); } | ||||
| } | ||||
| @ -1,12 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| // Using AT_API is crucial as otherwise you'll see | ||||
| // linking errors using MSVC | ||||
| // See https://msdn.microsoft.com/en-us/library/a90k134d.aspx | ||||
| // This header adds this if using AT_API | ||||
| #include "ATen/core/ATenGeneral.h" | ||||
|  | ||||
| namespace at { | ||||
| AT_API void set_num_threads(int); | ||||
| AT_API int get_num_threads(); | ||||
| } | ||||
| @ -1,20 +0,0 @@ | ||||
| #include <ATen/CPUTypeDefault.h> | ||||
|  | ||||
| #include <ATen/Context.h> | ||||
| #include <ATen/CPUGenerator.h> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| Allocator* CPUTypeDefault::allocator() const { | ||||
|   return getCPUAllocator(); | ||||
| } | ||||
|  | ||||
| Device CPUTypeDefault::getDeviceFromPtr(void * data) const { | ||||
|   return DeviceType::CPU; | ||||
| } | ||||
|  | ||||
| std::unique_ptr<Generator> CPUTypeDefault::generator() const { | ||||
|   return std::unique_ptr<Generator>(new CPUGenerator(&at::globalContext())); | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,14 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/TypeDefault.h> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| struct AT_API CPUTypeDefault : public TypeDefault { | ||||
|   CPUTypeDefault(TensorTypeId type_id, bool is_variable, bool is_undefined) | ||||
|       : TypeDefault(type_id, is_variable, is_undefined) {} | ||||
|   Allocator* allocator() const override; | ||||
|   Device getDeviceFromPtr(void * data) const override; | ||||
|   std::unique_ptr<Generator> generator() const override; | ||||
| }; | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,18 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ATen/core/Generator.h" | ||||
| #include "ATen/Utils.h" | ||||
| #include "ATen/core/Error.h" | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| template <typename T> | ||||
| static inline T * check_generator(Generator * expr, Generator * defaultValue) { | ||||
|   if (!expr) | ||||
|     expr = defaultValue; | ||||
|   if(auto result = dynamic_cast<T*>(expr)) | ||||
|     return result; | ||||
|   AT_ERROR("Expected a '", typeid(T).name(), "' but found '", typeid(expr).name(), "'"); | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,11 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| // Test these using #if AT_MKL_ENABLED(), not #ifdef, so that it's | ||||
| // obvious if you forgot to include Config.h | ||||
| //    c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined | ||||
| // | ||||
| // DO NOT put the macros for CUDA libraries in this file; they belong in cuda/CUDAConfig.h | ||||
|  | ||||
| #define AT_MKLDNN_ENABLED() @AT_MKLDNN_ENABLED@ | ||||
| #define AT_MKL_ENABLED() @AT_MKL_ENABLED@ | ||||
| #define CAFFE2_STATIC_LINK_CUDA() @CAFFE2_STATIC_LINK_CUDA@ | ||||
| @ -1,144 +0,0 @@ | ||||
| #include "ATen/Config.h" | ||||
|  | ||||
| #include "Context.h" | ||||
|  | ||||
| #include <ATen/core/TensorOptions.h> | ||||
|  | ||||
| #include <thread> | ||||
| #include <mutex> | ||||
| #include <sstream> | ||||
| #include <string> | ||||
| #include <stdexcept> | ||||
|  | ||||
| #include "ATen/CPUGenerator.h" | ||||
| #include "ATen/RegisterCPU.h" | ||||
| #include "ATen/Tensor.h" | ||||
|  | ||||
| #include "TH/TH.h"  // for USE_LAPACK | ||||
|  | ||||
| #ifdef USE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
| #endif | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| static inline void errorHandler(const char * msg, void * data) { | ||||
|   throw std::runtime_error(msg); | ||||
| } | ||||
| static inline void argErrorHandler(int arg, const char * msg, void * data) { | ||||
|   std::stringstream new_error; | ||||
|   new_error << "invalid argument " << arg << ": " << msg; | ||||
|   throw std::runtime_error(new_error.str()); | ||||
| } | ||||
|  | ||||
| Context::Context() | ||||
| : next_id(static_cast<size_t>(TypeID::NumOptions)) | ||||
| , thc_state(nullptr, [](THCState* p){ /* no-op */ } ) { | ||||
|  | ||||
|   THSetDefaultErrorHandler(errorHandler,nullptr); | ||||
|   THSetDefaultArgErrorHandler(argErrorHandler,nullptr); | ||||
|  | ||||
|   generator_registry[static_cast<int>(DeviceType::CPU)] | ||||
|     .reset(new CPUGenerator(this)); | ||||
|   register_cpu_types(this); | ||||
| } | ||||
|  | ||||
| // TODO: This could be bad juju if someone calls globalContext() in the | ||||
| // destructor of an object with static lifetime. | ||||
| Context & globalContext() { | ||||
|   static Context globalContext_; | ||||
|   return globalContext_; | ||||
| } | ||||
|  | ||||
| // NB: This method is *purely* whether or not a user requested | ||||
| // that CuDNN was enabled, it doesn't actually say anything about | ||||
| // whether or not CuDNN is actually usable. | ||||
| bool Context::userEnabledCuDNN() const { | ||||
|   return enabled_cudnn; | ||||
| } | ||||
|  | ||||
| void Context::setUserEnabledCuDNN(bool e) { | ||||
|   enabled_cudnn = e; | ||||
| } | ||||
|  | ||||
| bool Context::deterministicCuDNN() const { | ||||
|   return deterministic_cudnn; | ||||
| } | ||||
|  | ||||
| void Context::setDeterministicCuDNN(bool b) { | ||||
|   deterministic_cudnn = b; | ||||
| } | ||||
|  | ||||
| bool Context::benchmarkCuDNN() const { | ||||
|   return benchmark_cudnn; | ||||
| } | ||||
|  | ||||
| void Context::setBenchmarkCuDNN(bool b) { | ||||
|   benchmark_cudnn = b; | ||||
| } | ||||
|  | ||||
| bool Context::hasMKL() const { | ||||
| #if AT_MKL_ENABLED() | ||||
|   return true; | ||||
| #else | ||||
|   return false; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| bool Context::hasLAPACK() const { | ||||
| #ifdef USE_LAPACK | ||||
|   return true; | ||||
| #else | ||||
|   return false; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| bool Context::setFlushDenormal(bool on) { | ||||
| #ifdef USE_SSE3 | ||||
|   // Setting flush-to-zero (FTZ) flag | ||||
|   _MM_SET_FLUSH_ZERO_MODE(on ? _MM_FLUSH_ZERO_ON | ||||
|                              : _MM_FLUSH_ZERO_OFF); | ||||
|  | ||||
|   // Setting denormals-are-zero (DAZ) flag | ||||
|   _MM_SET_DENORMALS_ZERO_MODE(on ? _MM_DENORMALS_ZERO_ON | ||||
|                                  : _MM_DENORMALS_ZERO_OFF); | ||||
|   return true; | ||||
| #else | ||||
|   return false; | ||||
| #endif | ||||
| } | ||||
|  | ||||
| TypeExtendedInterface& getType(TensorOptions options) { | ||||
|   return globalContext().getType( | ||||
|             options.backend(), options.dtype(), options.is_variable()); | ||||
| } | ||||
|  | ||||
| TypeExtendedInterface& getType(const TensorImpl* impl) { | ||||
|   Backend backend = tensorTypeIdToBackend(impl->type_id()); | ||||
|   return globalContext().getType( | ||||
|             backend, dataTypeToScalarType(impl->dtype().id()), impl->is_variable()); | ||||
| } | ||||
|  | ||||
| TypeExtendedInterface& getType(const Tensor& t) { | ||||
|   return getType(t.unsafeGetTensorImpl()); | ||||
| } | ||||
|  | ||||
| Allocator* getCPUAllocator() { | ||||
|   return getTHDefaultAllocator(); | ||||
| } | ||||
|  | ||||
| struct LegacyTypeInit : public LegacyTypeInitInterface { | ||||
|   LegacyTypeInit(LegacyTypeInitArgs) {} | ||||
|   void initCPU() const override { | ||||
|     globalContext(); | ||||
|   } | ||||
|   void initCUDA() const override { | ||||
|     globalContext().lazyInitCUDA(); | ||||
|   } | ||||
|   void initComplex() const override { | ||||
|     globalContext().lazyInitComplex(); | ||||
|   } | ||||
| }; | ||||
| REGISTER_LEGACY_TYPE_INIT(LegacyTypeInit); | ||||
|  | ||||
| } | ||||
| @ -1,194 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/CPUGeneral.h> | ||||
| #include "ATen/core/ATenGeneral.h" | ||||
| #include "ATen/CUDAStream.h" | ||||
| #include "ATen/core/Generator.h" | ||||
| #include "ATen/Type.h" | ||||
| #include "ATen/TypeExtendedInterface.h" | ||||
| #include "ATen/Utils.h" | ||||
| #include "ATen/core/Error.h" | ||||
| #include "ATen/detail/CUDAHooksInterface.h" | ||||
| #include "ATen/core/VariableHooksInterface.h" | ||||
| #include "ATen/detail/ComplexHooksInterface.h" | ||||
| #include "ATen/core/LegacyTypeDispatch.h" | ||||
|  | ||||
| // This is temporary | ||||
| #include "ATen/core/ATenCoreTest.h" | ||||
|  | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <cstdint> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| struct Tensor; | ||||
|  | ||||
| class AT_API Context { | ||||
| public: | ||||
|   Context(); | ||||
|   TypeExtendedInterface* getNonVariableTypeRaw(Backend p, ScalarType s) { | ||||
|     return static_cast<TypeExtendedInterface*>(globalLegacyTypeDispatch().getNonVariableTypeRaw(p, s)); | ||||
|   } | ||||
|   TypeExtendedInterface * getNonVariableTypeOpt(Backend p, ScalarType s) { | ||||
|     return static_cast<TypeExtendedInterface*>(globalLegacyTypeDispatch().getNonVariableTypeOpt(p, s)); | ||||
|   } | ||||
|   TypeExtendedInterface & getNonVariableType(Backend p, ScalarType s) { | ||||
|     return static_cast<TypeExtendedInterface&>(globalLegacyTypeDispatch().getNonVariableType(p, s)); | ||||
|   } | ||||
|   TypeExtendedInterface & getVariableType(Backend p, ScalarType s) { | ||||
|     return static_cast<TypeExtendedInterface&>(globalLegacyTypeDispatch().getVariableType(p, s)); | ||||
|   } | ||||
|   TypeExtendedInterface & getType(Backend p, ScalarType s, bool is_variable) { | ||||
|     return static_cast<TypeExtendedInterface&>(globalLegacyTypeDispatch().getType(p, s, is_variable)); | ||||
|   } | ||||
|   // The passed in Type must be delete'able | ||||
|   // TODO: Just make it take a unique_ptr | ||||
|   void registerType(Backend b, ScalarType s, Type* t) { | ||||
|     globalLegacyTypeDispatch().registerType(b, s, | ||||
|       LegacyTypeDispatch::TypeUniquePtr{t, LegacyTypeDeleter([](Type* p) { delete p; }) }); | ||||
|   } | ||||
|  | ||||
|   Generator & defaultGenerator(DeviceType device_type) { | ||||
|     initCUDAIfNeeded(device_type); | ||||
|     auto & generator = generator_registry[static_cast<int>(device_type)]; | ||||
|     if(!generator) | ||||
|       AT_ERROR(DeviceTypeName(device_type), " backend type not enabled."); | ||||
|     return *generator; | ||||
|   } | ||||
|   bool hasMKL() const; | ||||
|   bool hasLAPACK() const; | ||||
|   bool hasMAGMA() const { | ||||
|     return detail::getCUDAHooks().hasMAGMA(); | ||||
|   } | ||||
|   bool hasCUDA() const { | ||||
|     return detail::getCUDAHooks().hasCUDA(); | ||||
|   } | ||||
|   bool hasCuDNN() const { | ||||
|     return detail::getCUDAHooks().hasCuDNN(); | ||||
|   } | ||||
|   int64_t current_device() const { | ||||
|     return detail::getCUDAHooks().current_device(); | ||||
|   } | ||||
|   // defined in header so that getNonVariableType has ability to inline | ||||
|   // call_once check. getNonVariableType is called fairly frequently | ||||
|   THCState* lazyInitCUDA() { | ||||
|     std::call_once(thc_init,[&] { | ||||
|       thc_state = detail::getCUDAHooks().initCUDA(); | ||||
|       generator_registry[static_cast<int>(DeviceType::CUDA)] = | ||||
|         detail::getCUDAHooks().initCUDAGenerator(this); | ||||
|       detail::getCUDAHooks().registerCUDATypes(this); | ||||
|     }); | ||||
|     return thc_state.get(); | ||||
|   } | ||||
|   void lazyInitComplex() { | ||||
|     std::call_once(complex_init_, [&] { | ||||
|       detail::getComplexHooks().registerComplexTypes(this); | ||||
|     }); | ||||
|   } | ||||
|  | ||||
|   THCState* getTHCState() { | ||||
|     // AT_ASSERT(thc_state); | ||||
|     return thc_state.get(); | ||||
|   } | ||||
|  | ||||
|   int getNumGPUs() const { | ||||
|     return detail::getCUDAHooks().getNumGPUs(); | ||||
|   } | ||||
|   size_t freshTypeID() { | ||||
|     return next_id++; | ||||
|   } | ||||
|   bool setFlushDenormal(bool on); | ||||
|  | ||||
|   // NB: This method is *purely* whether or not a user requested | ||||
|   // that CuDNN was enabled, it doesn't actually say anything about | ||||
|   // whether or not CuDNN is actually usable.  Use cudnn_is_acceptable | ||||
|   // to test this instead | ||||
|   bool userEnabledCuDNN() const; | ||||
|   void setUserEnabledCuDNN(bool e); | ||||
|   bool benchmarkCuDNN() const; | ||||
|   void setBenchmarkCuDNN(bool); | ||||
|   bool deterministicCuDNN() const; | ||||
|   void setDeterministicCuDNN(bool); | ||||
|   std::unique_ptr<Generator> | ||||
|     generator_registry[static_cast<int>(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)]; | ||||
| private: | ||||
|   void initCUDAIfNeeded(DeviceType p) { | ||||
|     if (p == DeviceType::CUDA) { | ||||
|       lazyInitCUDA(); | ||||
|     } | ||||
|   } | ||||
|   void initComplexIfNeeded(ScalarType s) { | ||||
|     if (isComplexType(s)) { | ||||
|       lazyInitComplex(); | ||||
|     } | ||||
|   } | ||||
|   std::once_flag thc_init; | ||||
|   std::once_flag complex_init_; | ||||
|   bool enabled_cudnn = true; | ||||
|   bool deterministic_cudnn = false; | ||||
|   bool benchmark_cudnn = false; | ||||
|   std::atomic<size_t> next_id; | ||||
|   std::unique_ptr<THCState, void(*)(THCState*)> thc_state; | ||||
|   friend struct Type; | ||||
| }; | ||||
|  | ||||
| AT_API Context & globalContext(); | ||||
|  | ||||
| static inline void init() { | ||||
|   globalContext(); | ||||
|   if (const char *env_p = std::getenv("OMP_NUM_THREADS")) { | ||||
|     at::set_num_threads(std::stoi(env_p)); | ||||
|   } | ||||
|   if (const char *env_p = std::getenv("MKL_NUM_THREADS")) { | ||||
|     at::set_num_threads(std::stoi(env_p)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| static inline TypeExtendedInterface& getNonVariableType(Backend p, ScalarType s) { | ||||
|   return globalContext().getNonVariableType(p, s); | ||||
| } | ||||
|  | ||||
| static inline TypeExtendedInterface& getNonVariableType(DeviceType p, ScalarType s) { | ||||
|   return globalContext().getNonVariableType(deviceTypeToBackend(p), s); | ||||
| } | ||||
|  | ||||
| AT_API TypeExtendedInterface& getType(TensorOptions options); | ||||
| AT_API TypeExtendedInterface& getType(const TensorImpl*); | ||||
| AT_API TypeExtendedInterface& getType(const Tensor&); | ||||
|  | ||||
| AT_API Allocator* getCPUAllocator(); | ||||
|  | ||||
| static inline TypeExtendedInterface& CPU(ScalarType s) { | ||||
|   return getNonVariableType(Backend::CPU, s); | ||||
| } | ||||
|  | ||||
| static inline TypeExtendedInterface& CUDA(ScalarType s) { | ||||
|   return getNonVariableType(Backend::CUDA, s); | ||||
| } | ||||
|  | ||||
| static inline bool hasCUDA() { | ||||
|   return globalContext().hasCUDA(); | ||||
| } | ||||
|  | ||||
| static inline bool hasCuDNN() { | ||||
|   return globalContext().hasCuDNN(); | ||||
| } | ||||
|  | ||||
| static inline bool hasMKL() { | ||||
|   return globalContext().hasMKL(); | ||||
| } | ||||
|  | ||||
| static inline bool hasLAPACK() { | ||||
|   return globalContext().hasLAPACK(); | ||||
| } | ||||
|  | ||||
| static inline bool hasMAGMA() { | ||||
|   return globalContext().hasMAGMA(); | ||||
| } | ||||
|  | ||||
| static inline int64_t current_device() { | ||||
|   return globalContext().current_device(); | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,180 +0,0 @@ | ||||
| #include "ATen/DLConvertor.h" | ||||
| #include "ATen/Functions.h" | ||||
|  | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
|  | ||||
|  | ||||
| using namespace std; | ||||
| namespace at { | ||||
|  | ||||
| static DLDataType getDLDataType(const Type& type) { | ||||
|   DLDataType dtype; | ||||
|   dtype.lanes = 1; | ||||
|   dtype.bits = type.elementSizeInBytes() * 8; | ||||
|   switch (type.scalarType()) { | ||||
|     case ScalarType::Byte: | ||||
|       dtype.code = DLDataTypeCode::kDLUInt; | ||||
|       break; | ||||
|     case ScalarType::Char: | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Double: | ||||
|       dtype.code = DLDataTypeCode::kDLFloat; | ||||
|       break; | ||||
|     case ScalarType::Float: | ||||
|       dtype.code = DLDataTypeCode::kDLFloat; | ||||
|       break; | ||||
|     case ScalarType::Int: | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Long: | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Short: | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Half: | ||||
|       dtype.code = DLDataTypeCode::kDLFloat; | ||||
|       break; | ||||
|     case ScalarType::ComplexHalf: | ||||
|       throw std::logic_error("ComplexHalf is not supported by dlpack"); | ||||
|     case ScalarType::ComplexFloat: | ||||
|       throw std::logic_error("ComplexFloat is not supported by dlpack"); | ||||
|     case ScalarType::ComplexDouble: | ||||
|       throw std::logic_error("ComplexDouble is not supported by dlpack"); | ||||
|     case ScalarType::Undefined: | ||||
|       throw std::logic_error("Undefined is not a valid ScalarType"); | ||||
|     case ScalarType::NumOptions: | ||||
|       throw std::logic_error("NumOptions is not a valid ScalarType"); | ||||
|   } | ||||
|   return dtype; | ||||
| } | ||||
|  | ||||
|  | ||||
| static DLContext getDLContext(const Type& type, const int64_t& device_id) { | ||||
|   DLContext ctx; | ||||
|   ctx.device_id = device_id; | ||||
|   if (type.is_cuda()) { | ||||
|     ctx.device_type = DLDeviceType::kDLGPU; | ||||
|   } else { | ||||
|     ctx.device_type = DLDeviceType::kDLCPU; | ||||
|   } | ||||
|   return ctx; | ||||
| } | ||||
|  | ||||
|  | ||||
| static DeviceType getATenDeviceType(const DLContext& ctx) { | ||||
|   switch (ctx.device_type) { | ||||
|     case DLDeviceType::kDLCPU: | ||||
|       return DeviceType::CPU; | ||||
|     case DLDeviceType::kDLGPU: | ||||
|       return DeviceType::CUDA; | ||||
|     case DLDeviceType::kDLOpenCL: | ||||
|       return DeviceType::OPENCL; | ||||
|     case DLDeviceType::kDLROCM: | ||||
|       return DeviceType::HIP; | ||||
|     default: | ||||
|       throw std::logic_error("Unsupported device_type: " + std::to_string(ctx.device_type)); | ||||
|   } | ||||
|   return DeviceType::CPU; // impossible | ||||
| } | ||||
|  | ||||
|  | ||||
| ScalarType toScalarType(const DLDataType& dtype) { | ||||
|   ScalarType stype; | ||||
|   if (dtype.lanes != 1) throw std::logic_error("ATen does not support lanes != 1"); | ||||
|   switch (dtype.code) { | ||||
|     case DLDataTypeCode::kDLUInt: | ||||
|       switch (dtype.bits) { | ||||
|         case 8: | ||||
|           stype = ScalarType::Byte; | ||||
|           break; | ||||
|         default: | ||||
|           throw std::logic_error("Unsupported kUInt bits " + std::to_string(dtype.bits)); | ||||
|       } | ||||
|       break; | ||||
|     case DLDataTypeCode::kDLInt: | ||||
|       switch (dtype.bits) { | ||||
|         case 8: | ||||
|           stype = ScalarType::Char; | ||||
|           break; | ||||
|         case 16: | ||||
|           stype = ScalarType::Short; | ||||
|           break; | ||||
|         case 32: | ||||
|           stype = ScalarType::Int; | ||||
|           break; | ||||
|         case 64: | ||||
|           stype = ScalarType::Long; | ||||
|           break; | ||||
|         default: | ||||
|           throw std::logic_error("Unsupported kInt bits " + std::to_string(dtype.bits)); | ||||
|       } | ||||
|       break; | ||||
|     case DLDataTypeCode::kDLFloat: | ||||
|       switch (dtype.bits) { | ||||
|         case 16: | ||||
|           stype = ScalarType::Half; | ||||
|           break; | ||||
|         case 32: | ||||
|           stype = ScalarType::Float; | ||||
|           break; | ||||
|         case 64: | ||||
|           stype = ScalarType::Double; | ||||
|           break; | ||||
|         default: | ||||
|           throw std::logic_error("Unsupported kFloat bits " + std::to_string(dtype.bits)); | ||||
|       } | ||||
|       break; | ||||
|     default: | ||||
|       throw std::logic_error("Unsupported code " + std::to_string(dtype.code)); | ||||
|   } | ||||
|   return stype; | ||||
| } | ||||
|  | ||||
| struct ATenDLMTensor { | ||||
|   Tensor handle; | ||||
|   DLManagedTensor tensor; | ||||
| }; | ||||
|  | ||||
| void deleter(DLManagedTensor * arg) { | ||||
|   delete static_cast<ATenDLMTensor*>(arg->manager_ctx); | ||||
| } | ||||
|  | ||||
|  | ||||
| // This function returns a shared_ptr to memory managed DLpack tensor constructed | ||||
| // out of ATen tensor | ||||
| DLManagedTensor* toDLPack(const Tensor& src) { | ||||
|   ATenDLMTensor * atDLMTensor(new ATenDLMTensor); | ||||
|   atDLMTensor->handle = src; | ||||
|   atDLMTensor->tensor.manager_ctx = atDLMTensor; | ||||
|   atDLMTensor->tensor.deleter = &deleter; | ||||
|   atDLMTensor->tensor.dl_tensor.data = src.data_ptr(); | ||||
|   int64_t device_id = 0; | ||||
|   if (src.type().is_cuda()) { | ||||
|     device_id = src.get_device(); | ||||
|   } | ||||
|   atDLMTensor->tensor.dl_tensor.ctx = getDLContext(src.type(), device_id); | ||||
|   atDLMTensor->tensor.dl_tensor.ndim = src.dim(); | ||||
|   atDLMTensor->tensor.dl_tensor.dtype = getDLDataType(src.type()); | ||||
|   atDLMTensor->tensor.dl_tensor.shape = const_cast<int64_t*>(src.sizes().data()); | ||||
|   atDLMTensor->tensor.dl_tensor.strides = const_cast<int64_t*>(src.strides().data()); | ||||
|   atDLMTensor->tensor.dl_tensor.byte_offset = 0; | ||||
|   return &(atDLMTensor->tensor); | ||||
| } | ||||
|  | ||||
|  | ||||
| Tensor fromDLPack(const DLManagedTensor* src) { | ||||
|   DeviceType device_type = getATenDeviceType(src->dl_tensor.ctx); | ||||
|   ScalarType stype = toScalarType(src->dl_tensor.dtype); | ||||
|   auto deleter = [src](void * self) { | ||||
|     src->deleter(const_cast<DLManagedTensor*>(src)); | ||||
|   }; | ||||
|   return at::from_blob(src->dl_tensor.data, | ||||
|       IntList(src->dl_tensor.shape, src->dl_tensor.ndim), | ||||
|       IntList(src->dl_tensor.strides, src->dl_tensor.ndim), | ||||
|       deleter, | ||||
|       at::device(device_type).dtype(stype)); | ||||
| } | ||||
| } //namespace at | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Device.h> | ||||
| @ -1,132 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/core/Device.h> | ||||
| #include <ATen/core/ScalarType.h> | ||||
| #include <ATen/Tensor.h> | ||||
| #include <ATen/core/Error.h> | ||||
| #include <ATen/core/optional.h> | ||||
| #include <ATen/detail/CUDAHooksInterface.h> | ||||
|  | ||||
| #include <cstddef> | ||||
|  | ||||
| namespace at { | ||||
| /// RAII guard that sets a certain default GPU index in its constructor, and | ||||
| /// changes it back to the device that was originally active upon destruction. | ||||
| /// | ||||
| /// The index is always reset to the one that was active at the time of | ||||
| /// construction of the guard. Even if you `set_index` after construction, the | ||||
| /// destructor will still reset the index to the one that was active at | ||||
| /// construction time. | ||||
| struct DeviceGuard { | ||||
|   /// Default constructor, does nothing. | ||||
|   DeviceGuard() = default; | ||||
|  | ||||
|   /// Uses the given device's `index()` if it is a CUDA device, else does | ||||
|   /// nothing. | ||||
|   explicit DeviceGuard(Device device) { | ||||
|     if (device.is_cuda()) { | ||||
|       set_index(device.index()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   explicit DeviceGuard(optional<Device> device_opt) { | ||||
|     if (device_opt.has_value() && device_opt.value().is_cuda()) { | ||||
|       set_index(device_opt.value().index()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /// Calls `set_index` with the given index. | ||||
|   explicit DeviceGuard(int32_t index) { | ||||
|     set_index(index); | ||||
|   } | ||||
|  | ||||
|   /// Sets the device to the index on which the given tensor is located. | ||||
|   explicit DeviceGuard(const Tensor& tensor) { | ||||
|     set_index_from(tensor); | ||||
|   } | ||||
|  | ||||
|   /// Sets the device to the index on which the first tensor in the list is | ||||
|   /// located. If the list is empty, does nothing. | ||||
|   explicit DeviceGuard(const TensorList& tensors) { | ||||
|     if (!tensors.empty()) { | ||||
|       set_index_from(tensors.front()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /// Copy is disallowed. | ||||
|   DeviceGuard(const DeviceGuard&) = delete; | ||||
|   DeviceGuard& operator=(const DeviceGuard&) = delete; | ||||
|  | ||||
|   /// Move-constructs this `DeviceGuard` from another `DeviceGuard`. The | ||||
|   /// moved-from `DeviceGuard` is modified such that its destruction has no | ||||
|   /// effect (does not reset the device). | ||||
|   DeviceGuard(DeviceGuard&& other) noexcept { | ||||
|     *this = std::move(other); | ||||
|   } | ||||
|  | ||||
|   /// Move-assigns this `DeviceGuard` from another `DeviceGuard`. The | ||||
|   /// moved-from `DeviceGuard` is modified such that its destruction has no | ||||
|   /// effect (does not reset the device). | ||||
|   DeviceGuard& operator=(DeviceGuard&& other) noexcept { | ||||
|     this->original_index_ = other.original_index_; | ||||
|     this->last_index_ = other.last_index_; | ||||
|     // Set other's original index to the unspecified/default state, so that it | ||||
|     // doesn't also reset the device in its constructor. | ||||
|     other.original_index_ = -1; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   /// Resets the device to the index that was active at construction of the | ||||
|   /// guard. | ||||
|   ~DeviceGuard() { | ||||
|     // It should only not have a value if an index was never actually set. | ||||
|     if (original_index_ != -1) { | ||||
|       // Unchecked because we don't want to throw in the destructor. | ||||
|       detail::DynamicCUDAInterface::unchecked_set_device(original_index_); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /// Sets the device to the given one. | ||||
|   void set_index(int32_t index) { | ||||
|     if (index == -1) { | ||||
|       return; | ||||
|     } | ||||
|     AT_ASSERT(index >= 0); | ||||
|     if (original_index_ == -1) { | ||||
|       int32_t previous_index = -123; | ||||
|       detail::DynamicCUDAInterface::get_device(&previous_index); | ||||
|       original_index_ = previous_index; | ||||
|       if (index != original_index_) { | ||||
|         detail::DynamicCUDAInterface::set_device(index); | ||||
|       } | ||||
|     } else { | ||||
|       detail::DynamicCUDAInterface::set_device(index); | ||||
|     } | ||||
|     last_index_ = index; | ||||
|   } | ||||
|  | ||||
|   /// Calls `set_index` with the `Tensor`'s current device, if it is a CUDA | ||||
|   /// tensor. Does nothing if the `tensor` is not defined. | ||||
|   void set_index_from(const Tensor& tensor) { | ||||
|     if (tensor.defined() && tensor.is_cuda()) { | ||||
|       set_index(tensor.get_device()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /// Returns the device that was set upon construction of the guard. | ||||
|   int32_t original_index() const noexcept { | ||||
|     return original_index_; | ||||
|   } | ||||
|  | ||||
|   /// Returns the last device that was set via `set_index`, if any. | ||||
|   int32_t last_index() const noexcept { | ||||
|     return last_index_; | ||||
|   } | ||||
|  | ||||
|  private: | ||||
|   /// The original device that was active at construction of this object. | ||||
|   int32_t original_index_ = -1; | ||||
|   /// The last index that was set via `set_index`. | ||||
|   int32_t last_index_ = -1; | ||||
| }; | ||||
| } // namespace at | ||||
| @ -1,11 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/core/SmallVector.h> | ||||
| #include <stdint.h> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| /// A container for sizes or strides | ||||
| using DimVector = SmallVector<int64_t, 5>; | ||||
|  | ||||
| } | ||||
| @ -1,130 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/Type.h> | ||||
| #include <ATen/core/Error.h> | ||||
| #include <ATen/core/Half.h> | ||||
|  | ||||
| #define AT_PRIVATE_CASE_TYPE(enum_type, type, ...) \ | ||||
|   case enum_type: {                                \ | ||||
|     using scalar_t = type;                         \ | ||||
|     return __VA_ARGS__();                          \ | ||||
|   } | ||||
|  | ||||
| #define AT_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...)                           \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__)         \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
| #define AT_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, NAME, ...)                  \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__)       \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
| #define AT_DISPATCH_INTEGRAL_TYPES(TYPE, NAME, ...)                           \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
| #define AT_DISPATCH_ALL_TYPES(TYPE, NAME, ...)                                \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
| #define AT_DISPATCH_ALL_TYPES_AND_HALF(TYPE, NAME, ...)                       \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__)       \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
| #define AT_DISPATCH_COMPLEX_TYPES(TYPE, NAME, ...)                            \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__)      \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
|  | ||||
| #define AT_DISPATCH_ALL_TYPES_AND_COMPLEX(TYPE, NAME, ...)                       \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__)      \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
| #define AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX(TYPE, NAME, ...)                       \ | ||||
|   [&] {                                                                       \ | ||||
|     const at::Type& the_type = TYPE;                                          \ | ||||
|     switch (the_type.scalarType()) {                                          \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Byte, uint8_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Char, int8_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Double, double, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Float, float, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Int, int32_t, __VA_ARGS__)         \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Half, at::Half, __VA_ARGS__)       \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexFloat, std::complex<float>, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::ComplexDouble, std::complex<double>, __VA_ARGS__)      \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'");  \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Error.h> | ||||
| @ -1,82 +0,0 @@ | ||||
| #include "ATen/ExpandUtils.h" | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| std::vector<int64_t> infer_size(IntList a, IntList b) { | ||||
|   auto dimsA = a.size(); | ||||
|   auto dimsB = b.size(); | ||||
|   ptrdiff_t ndim = dimsA > dimsB ? dimsA : dimsB; | ||||
|   std::vector<int64_t> expandedSizes(ndim); | ||||
|  | ||||
|   for (long i = ndim - 1; i >= 0; --i) { | ||||
|     long offset = ndim - 1 - i; | ||||
|     long dimA = dimsA - 1 - offset; | ||||
|     long dimB = dimsB - 1 - offset; | ||||
|     long sizeA = (dimA >= 0) ? a[dimA] : 1; | ||||
|     long sizeB = (dimB >= 0) ? b[dimB] : 1; | ||||
|  | ||||
|     AT_CHECK( | ||||
|         sizeA == sizeB || sizeA == 1 || sizeB == 1, | ||||
|         "The size of tensor a (", sizeA, | ||||
|         ") must match the size of tensor b (", sizeB, | ||||
|         ") at non-singleton dimension ", i); | ||||
|  | ||||
|       // 1s map to the other size (even 0). | ||||
|       expandedSizes[i] = sizeA == 1 ? sizeB : sizeA; | ||||
|   } | ||||
|  | ||||
|   return expandedSizes; | ||||
| } | ||||
|  | ||||
| std::tuple<std::vector<int64_t>, std::vector<int64_t>> inferExpandGeometry( | ||||
|     IntList tensor_sizes, | ||||
|     IntList tensor_strides, | ||||
|     IntList sizes) { | ||||
|   int64_t ndim = sizes.size(); | ||||
|   int64_t tensor_dim = tensor_sizes.size(); | ||||
|  | ||||
|   if (tensor_dim == 0) { | ||||
|     std::vector<int64_t> expandedStrides(ndim, 0); | ||||
|     return std::tuple<std::vector<int64_t>, std::vector<int64_t>>( | ||||
|         sizes.vec(), expandedStrides); | ||||
|   } | ||||
|   std::vector<int64_t> expandedSizes(ndim); | ||||
|   std::vector<int64_t> expandedStrides(ndim); | ||||
|  | ||||
|   // create a new geometry for the tensors | ||||
|   for (int64_t i = ndim - 1; i >= 0; --i) { | ||||
|     int64_t offset = ndim - 1 - i; | ||||
|     int64_t dim = tensor_dim - 1 - offset; | ||||
|     int64_t size = (dim >= 0) ? tensor_sizes[dim] : 1; | ||||
|     int64_t stride = (dim >= 0) ? tensor_strides[dim] | ||||
|                                 : expandedSizes[i + 1] * expandedStrides[i + 1]; | ||||
|     int64_t targetSize = sizes[i]; | ||||
|     if (targetSize == -1) { | ||||
|       AT_CHECK( | ||||
|           dim >= 0, | ||||
|           "The expanded size of the tensor (", | ||||
|           targetSize, | ||||
|           ") isn't allowed in a leading, non-existing dimension ", | ||||
|           i); | ||||
|       targetSize = size; | ||||
|     } | ||||
|     if (size != targetSize) { | ||||
|       AT_CHECK( | ||||
|           size == 1, | ||||
|           "The expanded size of the tensor (", | ||||
|           targetSize, | ||||
|           ") must match the existing size (", | ||||
|           size, | ||||
|           ") at non-singleton dimension ", | ||||
|           i); | ||||
|       size = targetSize; | ||||
|       stride = 0; | ||||
|     } | ||||
|     expandedSizes[i] = size; | ||||
|     expandedStrides[i] = stride; | ||||
|   } | ||||
|   return std::tuple<std::vector<int64_t>, std::vector<int64_t>>( | ||||
|       expandedSizes, expandedStrides); | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,169 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ATen/Tensor.h" | ||||
| #include "ATen/core/Error.h" | ||||
|  | ||||
| #include <functional> | ||||
| #include <sstream> | ||||
| #include <tuple> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| AT_API std::vector<int64_t> infer_size(IntList a, IntList b); | ||||
| AT_API std::tuple<std::vector<int64_t>, std::vector<int64_t> > inferExpandGeometry( | ||||
|     IntList tensor_sizes, IntList tensor_strides, IntList sizes); | ||||
|  | ||||
| // avoid copy-construction of Tensor by using a reference_wrapper. | ||||
| inline void check_defined(std::initializer_list<std::reference_wrapper<const Tensor>> tensors, const char *api_name) { | ||||
|   for (auto& t : tensors) { | ||||
|     if (!t.get().defined()) { | ||||
|       AT_ERROR(api_name, "(...) called with an undefined Tensor"); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand) { | ||||
|   if (tensor.sizes().equals(to_expand.sizes())) { | ||||
|     return std::make_tuple(to_expand); | ||||
|   } | ||||
|  | ||||
|   return std::make_tuple(to_expand.expand(tensor.sizes(), /*implicit=*/true)); // see [expand implicit] | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand, const char *api_name) { | ||||
|   check_defined({tensor, to_expand}, api_name); | ||||
|   return expand_inplace(tensor, to_expand); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2) { | ||||
|   if (tensor.sizes().equals(to_expand1.sizes()) && tensor.sizes().equals((to_expand2.sizes()))) { | ||||
|     return std::make_tuple(to_expand1, to_expand2); | ||||
|   } | ||||
|  | ||||
|   return std::make_tuple( | ||||
|       to_expand1.expand(tensor.sizes(), /*implicit=*/true), // see [expand implicit] | ||||
|       to_expand2.expand(tensor.sizes(), /*implicit=*/true)); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2, | ||||
|                                                  const char *api_name) { | ||||
|   check_defined({tensor, to_expand1, to_expand2}, api_name); | ||||
|   return expand_inplace(tensor, to_expand1, to_expand2); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, const Tensor &to_expand2) { | ||||
|   if (to_expand1.sizes().equals(to_expand2.sizes())) { | ||||
|     return std::make_tuple(to_expand1, to_expand2); | ||||
|   } | ||||
|  | ||||
|   auto expanded_size = infer_size(to_expand1.sizes(), to_expand2.sizes()); | ||||
|   return std::make_tuple( | ||||
|       to_expand1.expand(expanded_size, /*implicit=*/true), // see [expand implicit] | ||||
|       to_expand2.expand(expanded_size, /*implicit=*/true)); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name) { | ||||
|   check_defined({to_expand1, to_expand2}, api_name); | ||||
|   return expand_outplace(to_expand1, to_expand2); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1, | ||||
|                                                           const Tensor &to_expand2, | ||||
|                                                           const Tensor &to_expand3) { | ||||
|   if (to_expand1.sizes().equals(to_expand2.sizes()) && to_expand1.sizes().equals(to_expand3.sizes())) { | ||||
|     return std::make_tuple(to_expand1, to_expand2, to_expand3); | ||||
|   } | ||||
|  | ||||
|   auto expanded_size12 = infer_size(to_expand1.sizes(), to_expand2.sizes()); | ||||
|   auto expanded_size = infer_size(expanded_size12, to_expand3.sizes()); | ||||
|   return std::make_tuple( | ||||
|       to_expand1.expand(expanded_size, /*implicit=*/true), // see [expand implicit] | ||||
|       to_expand2.expand(expanded_size, /*implicit=*/true), | ||||
|       to_expand3.expand(expanded_size, /*implicit=*/true)); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1, | ||||
|                                                           const Tensor &to_expand2, | ||||
|                                                           const Tensor &to_expand3, | ||||
|                                                           const char *api_name) { | ||||
|   check_defined({to_expand1, to_expand2, to_expand3}, api_name); | ||||
|   return expand_outplace(to_expand1, to_expand2, to_expand3); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) { | ||||
|   if(to_expand.sizes().equals(sizes)) { | ||||
|     return std::make_tuple(to_expand); | ||||
|   } | ||||
|  | ||||
|   return std::make_tuple(to_expand.expand(sizes, /*implicit=*/true)); // see [expand implicit] | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes, const char *api_name) { | ||||
|   check_defined({to_expand}, api_name); | ||||
|   return expand_size(to_expand, sizes); | ||||
| } | ||||
|  | ||||
| inline std::vector<Tensor> expand_outplace(TensorList to_expand) { | ||||
|   // expands a list of Tensors; ignores undefined (null) tensors | ||||
|   bool first = true; | ||||
|   std::vector<int64_t> sizes; | ||||
|   for (size_t i = 0; i < to_expand.size(); ++i) { | ||||
|     if (!to_expand[i].defined()) { | ||||
|       continue; | ||||
|     } else if (first) { | ||||
|       sizes = to_expand[i].sizes().vec(); | ||||
|       first = false; | ||||
|     } else { | ||||
|       sizes = infer_size(sizes, to_expand[i].sizes()); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   std::vector<Tensor> result(to_expand.size()); | ||||
|   for (size_t i = 0; i < to_expand.size(); ++i) { | ||||
|     if (!to_expand[i].defined()) { | ||||
|       continue; | ||||
|     } else if (to_expand[i].sizes().equals(sizes)) { | ||||
|       result[i] = to_expand[i]; | ||||
|     } else { | ||||
|       result[i] = to_expand[i].expand(sizes, /*implicit=*/true); // see [expand implicit] | ||||
|     } | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| // Sums `tensor` repeatedly to produce a tensor of shape `shape`. | ||||
| // Precondition: is_expandable_to(shape, tensor.sizes()) must be true | ||||
| static inline Tensor sum_to(Tensor tensor, IntList shape) { | ||||
|   if (shape.size() == 0) { | ||||
|     return tensor.sum(); | ||||
|   } | ||||
|   Tensor result = tensor; | ||||
|   while (result.dim() > (int64_t)shape.size()) { | ||||
|     result = result.sum(0, false); | ||||
|   } | ||||
|   for (int64_t i = 0; i < result.dim(); ++i) { | ||||
|     if (shape[i] == 1 && result.sizes()[i] > 1) { | ||||
|       result = result.sum(i, true); | ||||
|     } | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| // True if `shape` can be broadcasted to `desired` | ||||
| static inline bool is_expandable_to(IntList shape, IntList desired) { | ||||
|   int ndim = shape.size(); | ||||
|   int target_dim = desired.size(); | ||||
|   if (ndim > target_dim) { | ||||
|     return false; | ||||
|   } | ||||
|   for (int i = 0; i < ndim; i++) { | ||||
|     int64_t size = shape[ndim - i - 1]; | ||||
|     int64_t target = desired[target_dim - i - 1]; | ||||
|     if (size != target && size != 1) { | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| } | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Generator.h> | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Half.h> | ||||
| @ -1,44 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <ATen/optional.h> | ||||
| #include <ATen/ScalarType.h> | ||||
| #include <sstream> | ||||
| #include <vector> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| // Infers the size of a dim with size -1, if it exists. Also checks that new | ||||
| // shape is compatible with the number of elements. | ||||
| static std::vector<int64_t> infer_size(IntList shape, int64_t numel) { | ||||
|   auto res = shape.vec(); | ||||
|   int64_t newsize = 1; | ||||
|   auto infer_dim = at::optional<int64_t>(); | ||||
|   for (int64_t dim = 0, ndim = shape.size(); dim != ndim; dim++) { | ||||
|     if (shape[dim] == -1) { | ||||
|       if (infer_dim) { | ||||
|         throw std::runtime_error("only one dimension can be inferred"); | ||||
|       } | ||||
|       infer_dim = dim; | ||||
|     } else if (shape[dim] >= 0) { | ||||
|       newsize *= shape[dim]; | ||||
|     } else { | ||||
|       AT_ERROR("invalid shape dimension ", shape[dim]); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (numel == newsize || (infer_dim && newsize > 0 && numel % newsize == 0)) { | ||||
|     if (infer_dim) { | ||||
|       // we have a degree of freedom here to select the dimension size; follow NumPy semantics | ||||
|       // and just bail. | ||||
|       AT_CHECK(newsize != 0, "cannot reshape tensor of 0 elements into shape ", shape); | ||||
|       res[*infer_dim] = numel / newsize; | ||||
|     } | ||||
|     return res; | ||||
|   } | ||||
|  | ||||
|   std::ostringstream ss; | ||||
|   ss << "shape '" << shape << "' is invalid for input of size " << numel; | ||||
|   throw std::runtime_error(ss.str()); | ||||
| } | ||||
|  | ||||
| } | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Layout.h> | ||||
| @ -1,100 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/Utils.h> | ||||
| #include <ATen/core/ArrayRef.h> | ||||
|  | ||||
| #include <vector> | ||||
|  | ||||
| namespace at { | ||||
|   /// MatrixRef - Like an ArrayRef, but with an extra recorded strides so that | ||||
|   /// we can easily view it as a multidimensional array. | ||||
|   /// | ||||
|   /// Like ArrayRef, this class does not own the underlying data, it is expected | ||||
|   /// to be used in situations where the data resides in some other buffer. | ||||
|   /// | ||||
|   /// This is intended to be trivially copyable, so it should be passed by | ||||
|   /// value. | ||||
|   /// | ||||
|   /// For now, 2D only (so the copies are actually cheap, without having | ||||
|   /// to write a SmallVector class) and contiguous only (so we can | ||||
|   /// return non-strided ArrayRef on index). | ||||
|   /// | ||||
|   /// P.S. dimension 0 indexes rows, dimension 1 indexes columns | ||||
|   template<typename T> | ||||
|   class MatrixRef { | ||||
|   public: | ||||
|     typedef size_t size_type; | ||||
|  | ||||
|   private: | ||||
|     /// Underlying ArrayRef | ||||
|     ArrayRef<T> arr; | ||||
|  | ||||
|     /// Stride of dim 0 (outer dimension) | ||||
|     size_type stride0; | ||||
|  | ||||
|     // Stride of dim 1 is assumed to be 1 | ||||
|  | ||||
|   public: | ||||
|     /// Construct an empty Matrixref. | ||||
|     /*implicit*/ MatrixRef() : arr(nullptr), stride0(0) {} | ||||
|  | ||||
|     /// Construct an MatrixRef from an ArrayRef and outer stride. | ||||
|     /*implicit*/ MatrixRef(ArrayRef<T> arr, size_type stride0) | ||||
|       : arr(arr), stride0(stride0) { | ||||
|         AT_CHECK(arr.size() % stride0 == 0, "MatrixRef: ArrayRef size ", arr.size(), " not divisible by stride ", stride0) | ||||
|       } | ||||
|  | ||||
|     /// @} | ||||
|     /// @name Simple Operations | ||||
|     /// @{ | ||||
|  | ||||
|     /// empty - Check if the matrix is empty. | ||||
|     bool empty() const { return arr.empty(); } | ||||
|  | ||||
|     const T *data() const { return arr.data(); } | ||||
|  | ||||
|     /// size - Get size a dimension | ||||
|     size_t size(size_t dim) const { | ||||
|       if (dim == 0) { | ||||
|         return arr.size() / stride0; | ||||
|       } else if (dim == 1) { | ||||
|         return stride0; | ||||
|       } else { | ||||
|         AT_CHECK(0, "MatrixRef: out of bounds dimension ", dim, "; expected 0 or 1"); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     size_t numel() const { | ||||
|       return arr.size(); | ||||
|     } | ||||
|  | ||||
|     /// equals - Check for element-wise equality. | ||||
|     bool equals(MatrixRef RHS) const { | ||||
|       return stride0 == RHS.stride0 && arr.equals(RHS.arr); | ||||
|     } | ||||
|  | ||||
|     /// @} | ||||
|     /// @name Operator Overloads | ||||
|     /// @{ | ||||
|     ArrayRef<T> operator[](size_t Index) const { | ||||
|       return arr.slice(Index*stride0, stride0); | ||||
|     } | ||||
|  | ||||
|     /// Disallow accidental assignment from a temporary. | ||||
|     /// | ||||
|     /// The declaration here is extra complicated so that "arrayRef = {}" | ||||
|     /// continues to select the move assignment operator. | ||||
|     template <typename U> | ||||
|     typename std::enable_if<std::is_same<U, T>::value, MatrixRef<T>>::type & | ||||
|     operator=(U &&Temporary) = delete; | ||||
|  | ||||
|     /// Disallow accidental assignment from a temporary. | ||||
|     /// | ||||
|     /// The declaration here is extra complicated so that "arrayRef = {}" | ||||
|     /// continues to select the move assignment operator. | ||||
|     template <typename U> | ||||
|     typename std::enable_if<std::is_same<U, T>::value, MatrixRef<T>>::type & | ||||
|     operator=(std::initializer_list<U>) = delete; | ||||
|  | ||||
|   }; | ||||
|  | ||||
| } // end namespace at | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/OptionsGuard.h> | ||||
| @ -1,70 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/ATen.h> | ||||
| #include <cstddef> | ||||
|  | ||||
| #ifdef _OPENMP | ||||
| #include <omp.h> | ||||
| #endif | ||||
|  | ||||
| namespace at { | ||||
| namespace internal { | ||||
| // This parameter is heuristically chosen to determine the minimum number of | ||||
| // work that warrants paralellism. For example, when summing an array, it is | ||||
| // deemed inefficient to parallelise over arrays shorter than 32768. Further, | ||||
| // no parallel algorithm (such as parallel_reduce) should split work into | ||||
| // smaller than GRAIN_SIZE chunks. | ||||
| constexpr int64_t GRAIN_SIZE = 32768; | ||||
| } // namespace internal | ||||
|  | ||||
| inline int64_t divup(int64_t x, int64_t y) { | ||||
|   return (x + y - 1) / y; | ||||
| } | ||||
|  | ||||
| template <class F> | ||||
| inline void parallel_for( | ||||
|     const int64_t begin, | ||||
|     const int64_t end, | ||||
|     const int64_t grain_size, | ||||
|     const F& f) { | ||||
| #ifdef _OPENMP | ||||
| #pragma omp parallel if (!omp_in_parallel() && ((end - begin) >= grain_size)) | ||||
|   { | ||||
|     int64_t num_threads = omp_get_num_threads(); | ||||
|     int64_t tid = omp_get_thread_num(); | ||||
|     int64_t chunk_size = divup((end - begin), num_threads); | ||||
|     int64_t begin_tid = begin + tid * chunk_size; | ||||
|     if (begin_tid < end) | ||||
|       f(begin_tid, std::min(end, chunk_size + begin_tid)); | ||||
|   } | ||||
| #else | ||||
|   if (begin < end) { | ||||
|     f(begin, end); | ||||
|   } | ||||
| #endif | ||||
| } | ||||
|  | ||||
| template <class scalar_t, class F, class SF> | ||||
| inline scalar_t parallel_reduce( | ||||
|     const int64_t begin, | ||||
|     const int64_t end, | ||||
|     const int64_t grain_size, | ||||
|     const scalar_t ident, | ||||
|     const F f, | ||||
|     const SF sf) { | ||||
|   if (get_num_threads() == 1) { | ||||
|     return f(begin, end, ident); | ||||
|   } else { | ||||
|     const int64_t num_results = divup((end - begin), grain_size); | ||||
|     std::vector<scalar_t> results(num_results); | ||||
|     scalar_t* results_data = results.data(); | ||||
| #pragma omp parallel for if ((end - begin) >= grain_size) | ||||
|     for (int64_t id = 0; id < num_results; id++) { | ||||
|       int64_t i = begin + id * grain_size; | ||||
|       results_data[id] = f(i, i + std::min(end - i, grain_size), ident); | ||||
|     } | ||||
|     return std::accumulate( | ||||
|         results_data, results_data + results.size(), ident, sf); | ||||
|   } | ||||
| } | ||||
|  | ||||
| } // namespace at | ||||
| @ -1,2 +0,0 @@ | ||||
| #pragma once | ||||
| #include <ATen/core/Registry.h> | ||||
| @ -1,60 +0,0 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <atomic> | ||||
|  | ||||
| #include "ATen/core/ATenGeneral.h" | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| // base class for refcounted things, allows for collects of generic | ||||
| // refcounted objects that include tensors | ||||
| struct AT_API Retainable { | ||||
|   Retainable(): refcount(1), weak_refcount(1) {} | ||||
|   void retain() { | ||||
|     ++refcount; | ||||
|   } | ||||
|   void release() { | ||||
|     if(--refcount == 0) { | ||||
|       // If we know that this is the last reference then we can skip | ||||
|       // all the decrements and release_resources(). | ||||
|       if (weak_refcount == 1) { | ||||
|         delete this; | ||||
|       } else { | ||||
|         release_resources(); | ||||
|         weak_release(); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   void weak_retain() { | ||||
|     ++weak_refcount; | ||||
|   } | ||||
|   void weak_release() { | ||||
|     if (--weak_refcount == 0) { | ||||
|       delete this; | ||||
|     } | ||||
|   } | ||||
|   bool weak_lock() { | ||||
|     for (;;) { | ||||
|       auto current_refcount = refcount.load(); | ||||
|       if (current_refcount == 0) return false; | ||||
|       if (refcount.compare_exchange_strong(current_refcount, current_refcount + 1)) break; | ||||
|     } | ||||
|     return true; | ||||
|   } | ||||
|   uint32_t use_count() const { | ||||
|     return refcount.load(); | ||||
|   } | ||||
|   uint32_t weak_use_count() const { | ||||
|     return weak_refcount.load(); | ||||
|   } | ||||
|  | ||||
|   virtual void release_resources() {}; | ||||
|   virtual ~Retainable() {} | ||||
| private: | ||||
|   // INVARIANT: once refcount reaches 0 it can never go up | ||||
|   // INVARIANT: weak_refcount = number of weak references + (refcount > 0 ? 1 : 0) | ||||
|   std::atomic<uint32_t> refcount; | ||||
|   std::atomic<uint32_t> weak_refcount; | ||||
| }; | ||||
|  | ||||
| } | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	