mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-25 16:14:55 +08:00 
			
		
		
		
	Compare commits
	
		
			59 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 74044638f7 | |||
| 7f73f1d591 | |||
| ac15471de4 | |||
| 49364eb426 | |||
| bcf2d65446 | |||
| f7a33f1eef | |||
| bd584d52df | |||
| c697af4667 | |||
| 0f3f4ec64c | |||
| 509df600bb | |||
| 187101a88e | |||
| e011d4a16e | |||
| 8ada95e950 | |||
| 21c2481dfe | |||
| 398e8ba182 | |||
| 074b30cdcb | |||
| 319bd5d431 | |||
| 5a20bbd377 | |||
| fa59a9e190 | |||
| 143868c3df | |||
| 964929fcc2 | |||
| cd20ecb472 | |||
| 19d4fd4910 | |||
| a7d187baa4 | |||
| 0541546ac5 | |||
| 369ab73efd | |||
| 9f558e1ee6 | |||
| f0ddfff200 | |||
| 2de184b5a9 | |||
| e0eeddfc78 | |||
| 7727b57d08 | |||
| 9e7dc37f90 | |||
| 227017059f | |||
| aeeccc1486 | |||
| 0b91246cbd | |||
| 0856d6f53c | |||
| 336e0d2874 | |||
| 3b36f2068d | |||
| 6207945564 | |||
| aecae514ab | |||
| 27a2ecb0a5 | |||
| e36fd7b0ba | |||
| 799cb646a6 | |||
| f60c63155a | |||
| 954d9ea466 | |||
| 71185fb2a0 | |||
| a06f26560c | |||
| e4cec279c6 | |||
| b8b50aa909 | |||
| db686de13f | |||
| 288e463693 | |||
| 73783d1048 | |||
| 8891d4eeb1 | |||
| 2085a6f329 | |||
| 3eda9e7da2 | |||
| fb8aa0e98c | |||
| c79b79dadd | |||
| 21acca4528 | |||
| f710757557 | 
| @ -36,6 +36,8 @@ class Conf(object): | ||||
|         # The cpu nightlies are built on the pytorch/manylinux-cuda100 docker image | ||||
|         alt_docker_suffix = self.cuda_version or "100" | ||||
|         docker_distro_suffix = "" if self.pydistro == "conda" else alt_docker_suffix | ||||
|         if self.cuda_version == "101": | ||||
|             return "soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916" | ||||
|         return miniutils.quote("pytorch/" + docker_distro_prefix + "-cuda" + docker_distro_suffix) | ||||
|  | ||||
|     def get_name_prefix(self): | ||||
|  | ||||
| @ -24,11 +24,11 @@ CONFIG_TREE_DATA = [ | ||||
|             ("5", [ | ||||
|                 XImportant("3.6"),  # This is actually the ASAN build | ||||
|             ]), | ||||
|             ("7", [ | ||||
|                 ("3.6", [ | ||||
|                     ("xla", [XImportant(True)]), | ||||
|                 ]), | ||||
|             ]), | ||||
|             # ("7", [ | ||||
|             #     ("3.6", [ | ||||
|             #         ("xla", [XImportant(True)]), | ||||
|             #     ]), | ||||
|             # ]), | ||||
|         ]), | ||||
|         ("cuda", [ | ||||
|             ("9", [ | ||||
|  | ||||
| @ -210,6 +210,7 @@ def instantiate_configs(): | ||||
|             android_abi = fc.find_prop("android_abi") | ||||
|             parms_list_ignored_for_docker_image.append(android_abi) | ||||
|             restrict_phases = ["build"] | ||||
|             fc.props["is_important"] = True | ||||
|  | ||||
|         elif compiler_name: | ||||
|             gcc_version = compiler_name + (fc.find_prop("compiler_version") or "") | ||||
|  | ||||
| @ -307,27 +307,28 @@ jobs: | ||||
|           time docker pull ${DOCKER_IMAGE} >/dev/null | ||||
|           export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) | ||||
|  | ||||
|           # TODO We may want to move the rebase logic to a separate step after checkout | ||||
|           # Rebase to master only if in xenial_py3_6_gcc5_4 case | ||||
|           if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then | ||||
|             echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT" | ||||
|             set -x | ||||
|             git config --global user.email "circleci.ossci@gmail.com" | ||||
|             git config --global user.name "CircleCI" | ||||
|             git config remote.origin.url https://github.com/pytorch/pytorch.git | ||||
|             git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master | ||||
|             git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet | ||||
|             export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master` | ||||
|             echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET} | ||||
|             export GIT_COMMIT=${CIRCLE_SHA1} | ||||
|             echo "GIT_COMMIT: " ${GIT_COMMIT} | ||||
|             git checkout -f ${GIT_COMMIT} | ||||
|             git reset --hard ${GIT_COMMIT} | ||||
|             git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET} | ||||
|             set +x | ||||
|           else | ||||
|             echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT" | ||||
|           fi | ||||
|           # NB: Temporarily disable the rebase logic in v1.4.0, don't merge this change into master | ||||
|           # # TODO We may want to move the rebase logic to a separate step after checkout | ||||
|           # # Rebase to master only if in xenial_py3_6_gcc5_4 case | ||||
|           # if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then | ||||
|           #   echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT" | ||||
|           #   set -x | ||||
|           #   git config --global user.email "circleci.ossci@gmail.com" | ||||
|           #   git config --global user.name "CircleCI" | ||||
|           #   git config remote.origin.url https://github.com/pytorch/pytorch.git | ||||
|           #   git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master | ||||
|           #   git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet | ||||
|           #   export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master` | ||||
|           #   echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET} | ||||
|           #   export GIT_COMMIT=${CIRCLE_SHA1} | ||||
|           #   echo "GIT_COMMIT: " ${GIT_COMMIT} | ||||
|           #   git checkout -f ${GIT_COMMIT} | ||||
|           #   git reset --hard ${GIT_COMMIT} | ||||
|           #   git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET} | ||||
|           #   set +x | ||||
|           # else | ||||
|           #   echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT" | ||||
|           # fi | ||||
|  | ||||
|           git submodule sync && git submodule update -q --init --recursive | ||||
|  | ||||
| @ -1709,20 +1710,6 @@ workflows: | ||||
|           build_environment: "pytorch-linux-xenial-py3-clang5-asan-test" | ||||
|           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan:405" | ||||
|           resource_class: large | ||||
|       - pytorch_linux_build: | ||||
|           name: pytorch_xla_linux_xenial_py3_6_clang7_build | ||||
|           requires: | ||||
|             - setup | ||||
|           build_environment: "pytorch-xla-linux-xenial-py3.6-clang7-build" | ||||
|           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-clang7:405" | ||||
|       - pytorch_linux_test: | ||||
|           name: pytorch_xla_linux_xenial_py3_6_clang7_test | ||||
|           requires: | ||||
|             - setup | ||||
|             - pytorch_xla_linux_xenial_py3_6_clang7_build | ||||
|           build_environment: "pytorch-xla-linux-xenial-py3.6-clang7-test" | ||||
|           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-clang7:405" | ||||
|           resource_class: large | ||||
|       - pytorch_linux_build: | ||||
|           name: pytorch_linux_xenial_cuda9_cudnn7_py3_build | ||||
|           requires: | ||||
| @ -1874,33 +1861,18 @@ workflows: | ||||
|           name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_64_build | ||||
|           requires: | ||||
|             - setup | ||||
|           filters: | ||||
|             branches: | ||||
|               only: | ||||
|                 - master | ||||
|                 - /ci-all\/.*/ | ||||
|           build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64-build" | ||||
|           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405" | ||||
|       - pytorch_linux_build: | ||||
|           name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v7a_build | ||||
|           requires: | ||||
|             - setup | ||||
|           filters: | ||||
|             branches: | ||||
|               only: | ||||
|                 - master | ||||
|                 - /ci-all\/.*/ | ||||
|           build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a-build" | ||||
|           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405" | ||||
|       - pytorch_linux_build: | ||||
|           name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v8a_build | ||||
|           requires: | ||||
|             - setup | ||||
|           filters: | ||||
|             branches: | ||||
|               only: | ||||
|                 - master | ||||
|                 - /ci-all\/.*/ | ||||
|           build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a-build" | ||||
|           docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405" | ||||
|       # Warning: indentation here matters! | ||||
| @ -2292,7 +2264,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2305,7 +2277,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2318,7 +2290,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2331,7 +2303,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2344,7 +2316,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2505,7 +2477,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2518,7 +2490,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2531,7 +2503,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2544,7 +2516,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2718,7 +2690,7 @@ workflows: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           libtorch_variant: "shared-with-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2732,7 +2704,7 @@ workflows: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           libtorch_variant: "shared-without-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2746,7 +2718,7 @@ workflows: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           libtorch_variant: "static-with-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -2760,7 +2732,7 @@ workflows: | ||||
|             branches: | ||||
|               only: postnightly | ||||
|           libtorch_variant: "static-without-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - smoke_linux_test: | ||||
| @ -3212,7 +3184,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_manywheel_2_7mu_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "manywheel 2.7mu cu101 devtoolset7" | ||||
| @ -3221,7 +3193,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_manywheel_3_5m_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "manywheel 3.5m cu101 devtoolset7" | ||||
| @ -3230,7 +3202,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_manywheel_3_6m_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "manywheel 3.6m cu101 devtoolset7" | ||||
| @ -3239,7 +3211,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_manywheel_3_7m_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "manywheel 3.7m cu101 devtoolset7" | ||||
| @ -3248,7 +3220,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_conda_2_7_cpu_devtoolset7_nightly_build | ||||
|           build_environment: "conda 2.7 cpu devtoolset7" | ||||
| @ -3365,7 +3337,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_conda_3_5_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "conda 3.5 cu101 devtoolset7" | ||||
| @ -3374,7 +3346,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_conda_3_6_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "conda 3.6 cu101 devtoolset7" | ||||
| @ -3383,7 +3355,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_conda_3_7_cu101_devtoolset7_nightly_build | ||||
|           build_environment: "conda 3.7 cu101 devtoolset7" | ||||
| @ -3392,7 +3364,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_libtorch_2_7m_cpu_devtoolset7_nightly_shared-with-deps_build | ||||
|           build_environment: "libtorch 2.7m cpu devtoolset7" | ||||
| @ -3522,7 +3494,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "shared-with-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_shared-without-deps_build | ||||
|           build_environment: "libtorch 2.7m cu101 devtoolset7" | ||||
| @ -3532,7 +3504,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "shared-without-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_static-with-deps_build | ||||
|           build_environment: "libtorch 2.7m cu101 devtoolset7" | ||||
| @ -3542,7 +3514,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "static-with-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_static-without-deps_build | ||||
|           build_environment: "libtorch 2.7m cu101 devtoolset7" | ||||
| @ -3552,7 +3524,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "static-without-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|       - binary_linux_build: | ||||
|           name: binary_linux_libtorch_2_7m_cpu_gcc5_4_cxx11-abi_nightly_shared-with-deps_build | ||||
|           build_environment: "libtorch 2.7m cpu gcc5.4_cxx11-abi" | ||||
| @ -4056,7 +4028,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4068,7 +4040,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4080,7 +4052,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4092,7 +4064,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4104,7 +4076,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4252,7 +4224,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4264,7 +4236,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4276,7 +4248,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4288,7 +4260,7 @@ workflows: | ||||
|           filters: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           docker_image: "pytorch/conda-cuda" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4449,7 +4421,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "shared-with-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4462,7 +4434,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "shared-without-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4475,7 +4447,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "static-with-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
| @ -4488,7 +4460,7 @@ workflows: | ||||
|             branches: | ||||
|               only: nightly | ||||
|           libtorch_variant: "static-without-deps" | ||||
|           docker_image: "pytorch/manylinux-cuda101" | ||||
|           docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916 | ||||
|           use_cuda_docker_runtime: "1" | ||||
|           resource_class: gpu.medium | ||||
|       - binary_linux_test: | ||||
|  | ||||
| @ -11,6 +11,8 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then | ||||
|   source activate testenv >/dev/null | ||||
| elif [[ "$DESIRED_PYTHON" == 2.7mu ]]; then | ||||
|   export PATH="/opt/python/cp27-cp27mu/bin:\$PATH" | ||||
| elif [[ "$DESIRED_PYTHON" == 3.8m ]]; then | ||||
|   export PATH="/opt/python/cp38-cp38/bin:\$PATH" | ||||
| elif [[ "$PACKAGE_TYPE" != libtorch ]]; then | ||||
|   python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)" | ||||
|   export PATH="/opt/python/cp\$python_nodot-cp\${python_nodot}m/bin:\$PATH" | ||||
|  | ||||
| @ -53,8 +53,10 @@ default_set = set([ | ||||
|     'pytorch-macos-10.13-cuda9.2-cudnn7-py3', | ||||
|     # PyTorch Android | ||||
|     'pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32-build', | ||||
|     'pytorch-linux-xenial-py3-clang5-android-ndk-r19', | ||||
|     # PyTorch Android gradle | ||||
|     'pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32', | ||||
|  | ||||
|     # Pytorch iOS builds | ||||
|     'pytorch-ios-11.2.1-x86_64_build', | ||||
|     'pytorch-ios-11.2.1-arm64_build', | ||||
|  | ||||
| @ -19,27 +19,28 @@ jobs: | ||||
|           time docker pull ${DOCKER_IMAGE} >/dev/null | ||||
|           export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE}) | ||||
|  | ||||
|           # TODO We may want to move the rebase logic to a separate step after checkout | ||||
|           # Rebase to master only if in xenial_py3_6_gcc5_4 case | ||||
|           if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then | ||||
|             echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT" | ||||
|             set -x | ||||
|             git config --global user.email "circleci.ossci@gmail.com" | ||||
|             git config --global user.name "CircleCI" | ||||
|             git config remote.origin.url https://github.com/pytorch/pytorch.git | ||||
|             git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master | ||||
|             git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet | ||||
|             export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master` | ||||
|             echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET} | ||||
|             export GIT_COMMIT=${CIRCLE_SHA1} | ||||
|             echo "GIT_COMMIT: " ${GIT_COMMIT} | ||||
|             git checkout -f ${GIT_COMMIT} | ||||
|             git reset --hard ${GIT_COMMIT} | ||||
|             git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET} | ||||
|             set +x | ||||
|           else | ||||
|             echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT" | ||||
|           fi | ||||
|           # NB: Temporarily disable the rebase logic in v1.4.0, don't merge this change into master | ||||
|           # # TODO We may want to move the rebase logic to a separate step after checkout | ||||
|           # # Rebase to master only if in xenial_py3_6_gcc5_4 case | ||||
|           # if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then | ||||
|           #   echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT" | ||||
|           #   set -x | ||||
|           #   git config --global user.email "circleci.ossci@gmail.com" | ||||
|           #   git config --global user.name "CircleCI" | ||||
|           #   git config remote.origin.url https://github.com/pytorch/pytorch.git | ||||
|           #   git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master | ||||
|           #   git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet | ||||
|           #   export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master` | ||||
|           #   echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET} | ||||
|           #   export GIT_COMMIT=${CIRCLE_SHA1} | ||||
|           #   echo "GIT_COMMIT: " ${GIT_COMMIT} | ||||
|           #   git checkout -f ${GIT_COMMIT} | ||||
|           #   git reset --hard ${GIT_COMMIT} | ||||
|           #   git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET} | ||||
|           #   set +x | ||||
|           # else | ||||
|           #   echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT" | ||||
|           # fi | ||||
|  | ||||
|           git submodule sync && git submodule update -q --init --recursive | ||||
|  | ||||
|  | ||||
							
								
								
									
										8
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							| @ -16,7 +16,7 @@ jobs: | ||||
|           python-version: 3.x | ||||
|           architecture: x64 | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@master | ||||
|         uses: actions/checkout@v1 | ||||
|       - name: Ensure consistent CircleCI YAML config | ||||
|         run: | | ||||
|           pip install -r requirements.txt | ||||
| @ -51,7 +51,7 @@ jobs: | ||||
|           python-version: 3.x | ||||
|           architecture: x64 | ||||
|       - name: Fetch PyTorch | ||||
|         uses: actions/checkout@master | ||||
|         uses: actions/checkout@v1 | ||||
|       - name: Checkout PR tip | ||||
|         run: | | ||||
|           set -eux | ||||
| @ -87,7 +87,7 @@ jobs: | ||||
|           python-version: 2.x | ||||
|           architecture: x64 | ||||
|       - name: Fetch PyTorch | ||||
|         uses: actions/checkout@master | ||||
|         uses: actions/checkout@v1 | ||||
|       - name: Checkout PR tip | ||||
|         run: | | ||||
|           set -eux | ||||
| @ -126,7 +126,7 @@ jobs: | ||||
|           python-version: 3.x | ||||
|           architecture: x64 | ||||
|       - name: Checkout PyTorch | ||||
|         uses: actions/checkout@master | ||||
|         uses: actions/checkout@v1 | ||||
|       - name: Checkout PR tip | ||||
|         run: | | ||||
|           set -eux | ||||
|  | ||||
| @ -64,7 +64,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then | ||||
|   exit 0 | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then | ||||
| # if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then | ||||
|   # Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04 | ||||
|   # See comments on | ||||
|   # https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830 | ||||
| @ -74,9 +74,9 @@ if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then | ||||
|   sudo pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl | ||||
|   sudo pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl | ||||
|   sudo pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl | ||||
| else | ||||
|   pip install --user --no-cache-dir hypothesis==3.59.0 | ||||
| fi | ||||
| # else | ||||
| #   pip install --user --no-cache-dir hypothesis==3.59.0 | ||||
| # fi | ||||
|  | ||||
| # Collect additional tests to run (outside caffe2/python) | ||||
| EXTRA_TESTS=() | ||||
| @ -133,7 +133,7 @@ pip install --user pytest-sugar | ||||
| # torchvision tests # | ||||
| ##################### | ||||
| if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then | ||||
|   pip install -q --user git+https://github.com/pytorch/vision.git | ||||
|   pip install -q --user git+https://github.com/pytorch/vision.git@v0.5.0 | ||||
|   pip install -q --user ninja | ||||
|   # JIT C++ extensions require ninja, so put it into PATH. | ||||
|   export PATH="/var/lib/jenkins/.local/bin:$PATH" | ||||
| @ -141,7 +141,7 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then | ||||
|     # default pip version is too old(9.0.2), unable to support tag `manylinux2010`. | ||||
|     # Fix the pip error: Couldn't find a version that satisfies the requirement | ||||
|     sudo pip install --upgrade pip | ||||
|     pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.0.0.dev1104 | ||||
|     pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.1.0.dev1228 | ||||
|   fi | ||||
|   "$ROOT_DIR/scripts/onnx/test.sh" | ||||
| fi | ||||
|  | ||||
| @ -49,7 +49,7 @@ if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then | ||||
|   export PATH="/var/lib/jenkins/.local/bin:$PATH" | ||||
|  | ||||
|   # TODO: move this to Docker | ||||
|   pip_install --user hypothesis | ||||
|   pip_install --user "hypothesis==4.53.2" | ||||
|  | ||||
|   # TODO: move this to Docker | ||||
|   PYTHON_VERSION=$(python -c 'import platform; print(platform.python_version())'|cut -c1) | ||||
| @ -214,7 +214,7 @@ test_backward_compatibility() { | ||||
|   pushd test/backward_compatibility | ||||
|   python dump_all_function_schemas.py --filename new_schemas.txt | ||||
|   pip_uninstall torch | ||||
|   pip_install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html | ||||
|   pip_install torch==1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html | ||||
|   python check_backward_compatibility.py --new-schemas new_schemas.txt | ||||
|   popd | ||||
|   set +x | ||||
|  | ||||
| @ -22,7 +22,7 @@ if NOT "%BUILD_ENVIRONMENT%"=="" ( | ||||
|     :: Numba is pinned to 0.44.0 to avoid https://github.com/numba/numba/issues/4352 | ||||
|     call conda install -y -q python=3.6.7 numpy mkl cffi pyyaml boto3 protobuf numba==0.44.0 | ||||
| ) | ||||
| pip install -q ninja future hypothesis "librosa>=0.6.2" psutil pillow | ||||
| pip install -q ninja future "hypothesis==4.53.2" "librosa>=0.6.2" psutil pillow | ||||
| :: No need to install faulthandler since we only test Python >= 3.6 on Windows | ||||
| :: faulthandler is builtin since Python 3.3 | ||||
|  | ||||
|  | ||||
| @ -413,7 +413,7 @@ public abstract class Tensor { | ||||
|    */ | ||||
|   public long[] getDataAsLongArray() { | ||||
|     throw new IllegalStateException( | ||||
|         "Tensor of type " + getClass().getSimpleName() + " cannot return data as float array."); | ||||
|         "Tensor of type " + getClass().getSimpleName() + " cannot return data as long array."); | ||||
|   } | ||||
|  | ||||
|   /** | ||||
|  | ||||
| @ -50,10 +50,10 @@ struct CAFFE2_API VariableHooksInterface { | ||||
|   virtual const std::string& name(const Tensor&) const = 0; | ||||
| }; | ||||
|  | ||||
| C10_API void SetVariableHooks(VariableHooksInterface* hooks); | ||||
| C10_API VariableHooksInterface* GetVariableHooks(); | ||||
| CAFFE2_API void SetVariableHooks(VariableHooksInterface* hooks); | ||||
| CAFFE2_API VariableHooksInterface* GetVariableHooks(); | ||||
|  | ||||
| struct C10_API VariableHooksRegisterer { | ||||
| struct CAFFE2_API VariableHooksRegisterer { | ||||
|   explicit VariableHooksRegisterer(VariableHooksInterface* hooks) { | ||||
|     SetVariableHooks(hooks); | ||||
|   } | ||||
|  | ||||
| @ -41,7 +41,7 @@ Tensor cosine_embedding_loss(const Tensor& input1, const Tensor& input2, const T | ||||
|   auto denom = (mag_square1 * mag_square2).sqrt_(); | ||||
|   auto cos = prod_sum / denom; | ||||
|  | ||||
|   auto zeros = at::zeros_like(target, LEGACY_CONTIGUOUS_MEMORY_FORMAT); | ||||
|   auto zeros = at::zeros_like(cos, LEGACY_CONTIGUOUS_MEMORY_FORMAT); | ||||
|   auto pos = 1 - cos; | ||||
|   auto neg = (cos - margin).clamp_min_(0); | ||||
|   auto output_pos = at::where(target == 1, pos, zeros); | ||||
| @ -77,8 +77,8 @@ Tensor margin_ranking_loss(const Tensor& input1, const Tensor& input2, const Ten | ||||
| } | ||||
|  | ||||
| Tensor kl_div(const Tensor& input, const Tensor& target, int64_t reduction) { | ||||
|   auto zeros = at::zeros_like(target, LEGACY_CONTIGUOUS_MEMORY_FORMAT); | ||||
|   auto output_pos = target * (at::log(target) - input); | ||||
|   auto zeros = at::zeros_like(output_pos, LEGACY_CONTIGUOUS_MEMORY_FORMAT); | ||||
|   auto output = at::where(target > 0, output_pos, zeros); | ||||
|   return apply_loss_reduction(output, reduction); | ||||
| } | ||||
|  | ||||
| @ -324,13 +324,14 @@ Tensor unflatten(const Tensor& self, int64_t dim, IntArrayRef sizes, DimnameList | ||||
|       "up to the size of dim ", dim, " (", self.names()[dim], ": ", self.size(dim), | ||||
|       ") in Tensor", self.names()); | ||||
|  | ||||
|   int64_t dim_wrap = maybe_wrap_dim(dim, self.dim()); | ||||
|   auto outnames = self.names().vec(); | ||||
|   outnames.erase(outnames.begin() + dim); | ||||
|   outnames.insert(outnames.begin() + dim, names.begin(), names.end()); | ||||
|   outnames.erase(outnames.begin() + dim_wrap); | ||||
|   outnames.insert(outnames.begin() + dim_wrap, names.begin(), names.end()); | ||||
|  | ||||
|   auto new_sizes = self.sizes().vec(); | ||||
|   new_sizes.erase(new_sizes.begin() + dim); | ||||
|   new_sizes.insert(new_sizes.begin() + dim, sizes.begin(), sizes.end()); | ||||
|   new_sizes.erase(new_sizes.begin() + dim_wrap); | ||||
|   new_sizes.insert(new_sizes.begin() + dim_wrap, sizes.begin(), sizes.end()); | ||||
|  | ||||
|   Tensor result; | ||||
|   { | ||||
|  | ||||
| @ -138,5 +138,14 @@ Tensor max_pool3d( | ||||
|       self, kernel_size, stride, padding, dilation, ceil_mode); | ||||
|   return std::get<0>(output_and_indices); | ||||
| } | ||||
|  | ||||
| Tensor _test_optional_float(const Tensor & self, c10::optional<double> scale) { | ||||
|   if (scale.has_value()) { | ||||
|     return at::full({}, scale.value(), self.options()); | ||||
|   } else { | ||||
|     return at::empty({0}, self.options()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| } // namespace native | ||||
| } // namespace at | ||||
|  | ||||
| @ -134,6 +134,7 @@ std::vector<Tensor> where(const Tensor& condition) { | ||||
| } | ||||
|  | ||||
| Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& other) { | ||||
|   TORCH_CHECK(self.dtype() == other.dtype(), "expected scalar type ", self.dtype(), " but found ", other.dtype()); | ||||
|   Tensor ret = at::empty(self.sizes(), self.options()); | ||||
|   AT_DISPATCH_ALL_TYPES_AND_COMPLEX(ret.scalar_type(), "where_cpu", [&] { | ||||
|     where_cpu<scalar_t>(ret, condition, self, other); | ||||
|  | ||||
| @ -42,7 +42,22 @@ static void copy_kernel(TensorIterator& iter, bool non_blocking) { | ||||
|     AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, dtype, "copy_", [&] { | ||||
|       using dest_t = scalar_t; | ||||
|       AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, iter.dtype(1), "copy_", [&] { | ||||
|         cpu_kernel(iter, c10::static_cast_with_inter_type<dest_t, scalar_t>); | ||||
|         // Note (@zasdfgbnm): | ||||
|         // | ||||
|         // The code below can not be simplified as | ||||
|         //    cpu_kernel(iter, c10::static_cast_with_inter_type<dest_t, scalar_t>::apply); | ||||
|         // | ||||
|         // because this would force the compiler to instantiate the inline function and generate a function call in the loop | ||||
|         // instead of inlining it, making all the optimizations like vectorization impossible. | ||||
|         // You can verify this by looking the the symbols of `libtorch_cpu.so`: | ||||
|         // | ||||
|         //    readelf -Ws libtorch_cpu.so | grep static_cast_with_inter_type | ||||
|         // | ||||
|         // If done correctly, the above command should have no output. | ||||
|         // | ||||
|         // See: https://github.com/pytorch/pytorch/issues/31271 | ||||
|         cpu_kernel(iter, [](scalar_t src) -> dest_t { | ||||
|           return c10::static_cast_with_inter_type<dest_t, scalar_t>(src); }); | ||||
|       }); | ||||
|     }); | ||||
|   } | ||||
|  | ||||
| @ -171,8 +171,9 @@ void avg_pool2d_out_cuda_template( | ||||
|  | ||||
|   output.resize_({nbatch, nInputPlane, outputHeight, outputWidth}); | ||||
|  | ||||
|   const int count = safe_downcast<int, int64_t>(output.numel()); | ||||
|   const int num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024); | ||||
|   const int32_t count = safe_downcast<int32_t, int64_t>(output.numel()); | ||||
|   const uint32_t  num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024); | ||||
|   const uint32_t num_blocks = cuda::ATenCeilDiv<uint32_t>(count, num_threads); | ||||
|  | ||||
|   if (divisor_override.has_value()) { | ||||
|     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), | ||||
| @ -184,7 +185,7 @@ void avg_pool2d_out_cuda_template( | ||||
|         scalar_t *input_data = input.data_ptr<scalar_t>(); | ||||
|  | ||||
|         avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, false, true> | ||||
|             <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|             <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|             count, | ||||
|                 input_data, | ||||
|                 nbatch, | ||||
| @ -209,7 +210,7 @@ void avg_pool2d_out_cuda_template( | ||||
|           scalar_t *input_data = input.data_ptr<scalar_t>(); | ||||
|  | ||||
|           avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, true, false> | ||||
|               <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|               <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|               count, | ||||
|                   input_data, | ||||
|                   nbatch, | ||||
| @ -233,7 +234,7 @@ void avg_pool2d_out_cuda_template( | ||||
|           scalar_t *input_data = input.data_ptr<scalar_t>(); | ||||
|  | ||||
|           avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, false, false> | ||||
|               <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|               <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|               count, | ||||
|                   input_data, | ||||
|                   nbatch, | ||||
| @ -250,9 +251,7 @@ void avg_pool2d_out_cuda_template( | ||||
|   } | ||||
|  | ||||
|  | ||||
|   TORCH_CHECK(cudaGetLastError() == cudaSuccess, | ||||
|      "avg_pool2d_out_cuda_frame failed with error code ", | ||||
|      cudaGetLastError()); | ||||
|   THCudaCheck(cudaGetLastError()); | ||||
|  | ||||
|   if (input.ndimension() == 3) { | ||||
|     output.resize_({nInputPlane, outputHeight, outputWidth}); | ||||
| @ -322,8 +321,9 @@ Tensor& avg_pool2d_backward_out_cuda_template( | ||||
|  | ||||
|   gradInput.resize_as_(input); | ||||
|  | ||||
|   const int count =  safe_downcast<int, int64_t>(input.numel()); | ||||
|   const int num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024); | ||||
|   const int32_t count =  safe_downcast<int32_t, int64_t>(input.numel()); | ||||
|   const uint32_t num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024); | ||||
|   const uint32_t num_blocks = cuda::ATenCeilDiv<uint32_t>(count, num_threads); | ||||
|  | ||||
|   if (divisor_override.has_value()) { | ||||
|     AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), | ||||
| @ -335,7 +335,7 @@ Tensor& avg_pool2d_backward_out_cuda_template( | ||||
|         scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>(); | ||||
|  | ||||
|         avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, false, true> | ||||
|             <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|             <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|             count, | ||||
|                 gradOutput_data, | ||||
|                 nbatch, | ||||
| @ -360,7 +360,7 @@ Tensor& avg_pool2d_backward_out_cuda_template( | ||||
|           scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>(); | ||||
|  | ||||
|           avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, true, false> | ||||
|             <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|             <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|                count, | ||||
|                gradOutput_data, | ||||
|                nbatch, | ||||
| @ -384,7 +384,7 @@ Tensor& avg_pool2d_backward_out_cuda_template( | ||||
|           scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>(); | ||||
|  | ||||
|           avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, false, false> | ||||
|             <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|             <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>( | ||||
|                count, | ||||
|                gradOutput_data, | ||||
|                nbatch, | ||||
| @ -400,9 +400,7 @@ Tensor& avg_pool2d_backward_out_cuda_template( | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   TORCH_CHECK(cudaGetLastError() == cudaSuccess, | ||||
|     "avg_pool2d_backward_out_cuda failed with error code ", | ||||
|     cudaGetLastError()); | ||||
|   THCudaCheck(cudaGetLastError()); | ||||
|  | ||||
|   return gradInput; | ||||
| } | ||||
|  | ||||
| @ -22,16 +22,88 @@ static inline __host__ __device__ T powi(T a, T b) { | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| // SFINAE doesn't work well with NVCC under Windows for math functions like pow and sqrt. | ||||
| // So we need to define the functions with the explicit function signatures. | ||||
| // As for pow, the following signatures are defined as the device function: | ||||
| //   pow(float, int) | ||||
| //   pow(double, int) | ||||
| //   pow(float, float) | ||||
| //   pow(double, double) | ||||
| // As for sqrt, the following signatures are defined as the device function: | ||||
| //   sqrt(float) | ||||
| //   sqrt(double) | ||||
| // As for inverse sqrt, we must define it explicitly in MSVC, otherwise the static cast will be | ||||
| // applied to the result of the inline function, and thus the result is incorrect. | ||||
| //   e.g. if we use 1.0 / sqrt(2) for 2 ^ (-0.5) in MSVC, we get | ||||
| //          int(2 ^ (-0.5)) = int(1.0 / sqrt(2)) = int(1.0 / int(1.414)) = int(1.0 / 1) = 1 | ||||
| //        However, the correct result is  | ||||
| //          int(2 ^ (-0.5)) = int(1.0 / 1.414) = 0 | ||||
| #ifdef _MSC_VER | ||||
| // Functions for pow | ||||
| // pow for at::Half | ||||
| static inline __host__ __device__ at::Half pow_(at::Half base, at::Half exp) { | ||||
|   return static_cast<at::Half>(std::pow(static_cast<float>(base), static_cast<float>(exp))); | ||||
| } | ||||
| // pow (floating, floating/int) | ||||
| template <typename Base_type, typename Exp_type> | ||||
| static inline __host__ __device__ typename std::enable_if<std::is_floating_point<Base_type>::value && (std::is_same<Base_type, Exp_type>::value || std::is_same<Exp_type, int>::value), Base_type>::type | ||||
|   pow_(Base_type base, Exp_type exp) { | ||||
|   return std::pow(base, exp); | ||||
| } | ||||
| // pow (integral, integral) | ||||
| template <typename Base_type, typename Exp_type> | ||||
| static inline __host__ __device__ typename std::enable_if<std::is_integral<Base_type>::value && std::is_same<Base_type, Exp_type>::value, Base_type>::type | ||||
|   pow_(Base_type base, Exp_type exp) { | ||||
|   return powi(base, exp); | ||||
| } | ||||
| // pow (Otherwise) | ||||
| template <typename Base_type, typename Exp_type> | ||||
| static inline __host__ __device__ typename std::enable_if<!std::is_same<Base_type, Exp_type>::value && !std::is_same<Exp_type, int>::value, Base_type>::type | ||||
|   pow_(Base_type base, Exp_type exp) { | ||||
|   return static_cast<Base_type>(std::pow(static_cast<double>(base), static_cast<double>(exp))); | ||||
| } | ||||
| // Functions for sqrt | ||||
| // sqrt (floating) | ||||
| template <typename T> | ||||
| static inline __host__ __device__ T sqrt(T x) { | ||||
| static inline __host__ __device__ typename std::enable_if<std::is_floating_point<T>::value, T>::type sqrt_(T x) { | ||||
|   return std::sqrt(x); | ||||
| } | ||||
| // sqrt (integral) | ||||
| template <typename T> | ||||
| static inline __host__ __device__ typename std::enable_if<!std::is_floating_point<T>::value, T>::type sqrt_(T x) { | ||||
|   return static_cast<T>(std::sqrt(static_cast<double>(x))); | ||||
| } | ||||
| // Function for inverse sqrt | ||||
| // invsqrt (floating) | ||||
| template <typename T> | ||||
| static inline __host__ __device__ typename std::enable_if<std::is_floating_point<T>::value, T>::type invsqrt_(T x) { | ||||
|   return 1.0 / std::sqrt(x); | ||||
| } | ||||
| // invsqrt (integral) | ||||
| template <typename T> | ||||
| static inline __host__ __device__ typename std::enable_if<!std::is_floating_point<T>::value, T>::type invsqrt_(T x) { | ||||
|   return static_cast<T>(1.0 / std::sqrt(static_cast<double>(x))); | ||||
| } | ||||
| #else | ||||
| template <typename Base_type, typename Exp_type> | ||||
| static inline __host__ __device__ Base_type pow_(Base_type base, Exp_type exp) { | ||||
|   return std::pow(base, exp); | ||||
| } | ||||
| template <typename T> | ||||
| static inline __host__ __device__ T sqrt_(T x) { | ||||
|   return ::sqrt(x); | ||||
| } | ||||
| template <typename T> | ||||
| static inline __host__ __device__ T invsqrt_(T x) { | ||||
|   return 1.0 / ::sqrt(x); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| void pow_tensor_tensor_kernel(TensorIterator& iter) { | ||||
|   if (isFloatingType(iter.dtype())) { | ||||
|     AT_DISPATCH_FLOATING_TYPES_AND_HALF(iter.dtype(), "pow_cuda", [&]() { | ||||
|       gpu_kernel(iter, []GPU_LAMBDA(scalar_t base, scalar_t exp) -> scalar_t { | ||||
|         return std::pow(base, exp); | ||||
|         return pow_(base, exp); | ||||
|       }); | ||||
|     }); | ||||
|   } else { | ||||
| @ -49,7 +121,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter, | ||||
|   const auto d_exp = static_cast<double>(exp); | ||||
|   if (d_exp == 0.5) { | ||||
|     gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type { | ||||
|       return ::sqrt(base); | ||||
|       return sqrt_(base); | ||||
|     }); | ||||
|   } else if (d_exp == 2) { | ||||
|     gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type { | ||||
| @ -61,7 +133,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter, | ||||
|     }); | ||||
|   } else if (d_exp == -0.5) { | ||||
|     gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type { | ||||
|       return 1.0 / ::sqrt(base); | ||||
|       return invsqrt_(base); | ||||
|     }); | ||||
|   } else if (d_exp == -1) { | ||||
|     gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type { | ||||
| @ -73,7 +145,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter, | ||||
|     }); | ||||
|   } else { | ||||
|     gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type { | ||||
|       return std::pow(base, exp); | ||||
|       return pow_(base, exp); | ||||
|     }); | ||||
|   } | ||||
| } | ||||
|  | ||||
| @ -52,7 +52,7 @@ Tensor& linspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step | ||||
|   } else if (steps == 1) { | ||||
|     r.fill_(start); | ||||
|   } else { | ||||
|     AT_DISPATCH_FLOATING_TYPES(r.scalar_type(), "linspace_cuda", [&]() { | ||||
|     AT_DISPATCH_FLOATING_TYPES_AND_HALF(r.scalar_type(), "linspace_cuda", [&]() { | ||||
|       scalar_t scalar_start = start.to<scalar_t>(); | ||||
|       scalar_t scalar_end = end.to<scalar_t>(); | ||||
|       scalar_t step = (scalar_end - scalar_start) / static_cast<scalar_t>(steps - 1); | ||||
| @ -84,7 +84,7 @@ Tensor& logspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step | ||||
|   } else if (steps == 1) { | ||||
|     r.fill_(std::pow(base, start.to<double>())); | ||||
|   } else { | ||||
|     AT_DISPATCH_FLOATING_TYPES(r.scalar_type(), "logspace_cuda", [&]() { | ||||
|     AT_DISPATCH_FLOATING_TYPES_AND_HALF(r.scalar_type(), "logspace_cuda", [&]() { | ||||
|       scalar_t scalar_base = static_cast<scalar_t>(base); | ||||
|       scalar_t scalar_start = start.to<scalar_t>(); | ||||
|       scalar_t scalar_end = end.to<scalar_t>(); | ||||
|  | ||||
| @ -6068,6 +6068,9 @@ | ||||
|     CPU: replication_pad3d_backward_cpu | ||||
|     CUDA: replication_pad3d_backward_cuda | ||||
|  | ||||
| - func: _test_optional_float(Tensor self, *, float? scale=None) -> Tensor | ||||
|   variants: function | ||||
|  | ||||
| - func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, *, Tensor(a!) out) -> Tensor(a!) | ||||
|   python_module: nn | ||||
|   dispatch: | ||||
|  | ||||
| @ -77,6 +77,8 @@ def type_argument_translations(arg): | ||||
|     # Enables float by translating to legacy double. | ||||
|     elif t == 'float': | ||||
|         t = 'double' | ||||
|     elif t == 'float?': | ||||
|         t = 'double?' | ||||
|     # Enables str by translating to legacy std::string. | ||||
|     elif t == 'str': | ||||
|         t = 'std::string' | ||||
|  | ||||
							
								
								
									
										25
									
								
								caffe2/operators/alias_with_name.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								caffe2/operators/alias_with_name.cc
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| #include "caffe2/operators/alias_with_name.h" | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| REGISTER_CPU_OPERATOR(AliasWithName, AliasWithNameOp<CPUContext>); | ||||
|  | ||||
| OPERATOR_SCHEMA(AliasWithName) | ||||
|     .NumInputs(1) | ||||
|     .NumOutputs(1) | ||||
|     .AllowInplace({{0, 0}}) | ||||
|     .IdenticalTypeAndShape() | ||||
|     .SetDoc(R"DOC( | ||||
| Similar with AliasOp, storing the alias name as operator argument. | ||||
| )DOC") | ||||
|     .Arg("name", "name of the aliasing") | ||||
|     .Arg("is_backward", "weather or not to alias forward or backward") | ||||
|     .Input(0, "input", "Input tensor whose storage will be shared.") | ||||
|     .Output(0, "output", "Tensor of same shape as input, sharing its storage."); | ||||
|  | ||||
| } // namespace caffe2 | ||||
|  | ||||
| C10_EXPORT_CAFFE2_OP_TO_C10_CPU( | ||||
|     AliasWithName, | ||||
|     "_caffe2::AliasWithName(Tensor input, str name, bool is_backward = False) -> (Tensor output)", | ||||
|     caffe2::AliasWithNameOp<caffe2::CPUContext>); | ||||
							
								
								
									
										12
									
								
								caffe2/operators/alias_with_name.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								caffe2/operators/alias_with_name.cu
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| #include "caffe2/core/context_gpu.h" | ||||
| #include "caffe2/operators/alias_with_name.h" | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| REGISTER_CUDA_OPERATOR(AliasWithName, AliasWithNameOp<CUDAContext>); | ||||
|  | ||||
| } // namespace caffe2 | ||||
|  | ||||
| C10_EXPORT_CAFFE2_OP_TO_C10_CUDA( | ||||
|     AliasWithName, | ||||
|     caffe2::AliasWithNameOp<caffe2::CUDAContext>); | ||||
							
								
								
									
										46
									
								
								caffe2/operators/alias_with_name.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								caffe2/operators/alias_with_name.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| #ifndef ALIAS_WITH_NAME_OP_H_ | ||||
| #define ALIAS_WITH_NAME_OP_H_ | ||||
|  | ||||
| #include "caffe2/core/context.h" | ||||
| #include "caffe2/core/export_caffe2_op_to_c10.h" | ||||
| #include "caffe2/core/operator.h" | ||||
|  | ||||
| C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(AliasWithName) | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| template <class Context> | ||||
| class AliasWithNameOp final : public Operator<Context> { | ||||
|  public: | ||||
|   USE_OPERATOR_CONTEXT_FUNCTIONS; | ||||
|   template <class... Args> | ||||
|   explicit AliasWithNameOp(Args&&... args) | ||||
|       : Operator<Context>(std::forward<Args>(args)...), | ||||
|         name_(this->template GetSingleArgument<std::string>( | ||||
|             "name", | ||||
|             "invalid_name")), | ||||
|         is_backward_( | ||||
|             this->template GetSingleArgument<bool>("is_backward", false)) { | ||||
|     CAFFE_ENFORCE( | ||||
|         OperatorBase::HasArgument("name"), "You have to specify argument name"); | ||||
|   } | ||||
|  | ||||
|   bool RunOnDevice() override { | ||||
|     auto& input = Input(0); | ||||
|     CAFFE_ENFORCE_GE(input.numel(), 0, "Tensor is not initialized"); | ||||
|  | ||||
|     // This doesn't work anymore as this is "newstyle" operator | ||||
|     // OutputTensorAlias(0, input); | ||||
|  | ||||
|     OperatorBase::SetOutputTensor(0, input.Alias()); | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|  protected: | ||||
|   std::string name_; | ||||
|   bool is_backward_; | ||||
| }; | ||||
|  | ||||
| } // namespace caffe2 | ||||
|  | ||||
| #endif // ALIAS_WITH_NAME_OP_H_ | ||||
							
								
								
									
										169
									
								
								caffe2/operators/batch_permutation_op.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								caffe2/operators/batch_permutation_op.cc
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,169 @@ | ||||
| #include "caffe2/operators/batch_permutation_op.h" | ||||
|  | ||||
| #include <cstring> | ||||
| #include <vector> | ||||
|  | ||||
| #ifdef CAFFE2_USE_MKLDNN | ||||
| #include <caffe2/ideep/operators/operator_fallback_ideep.h> | ||||
| #include <caffe2/ideep/utils/ideep_operator.h> | ||||
| #endif | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| template <bool forwards> | ||||
| void batch_permutation_loop( | ||||
|     const int N, | ||||
|     const int K, | ||||
|     const float* src, | ||||
|     const int* indices, | ||||
|     float* dst) { | ||||
|   long numBytes = K * sizeof(float); | ||||
|   if (forwards) { | ||||
| #ifdef _OPENMP | ||||
| #if (_OPENMP >= 201307) | ||||
| #pragma omp parallel for simd | ||||
| #else | ||||
| #pragma omp parallel for | ||||
| #endif | ||||
| #endif | ||||
|     for (int n = 0; n < N; n++) { | ||||
|       int origIdx = n * K; | ||||
|       int permuteIdx = indices[n] * K; | ||||
|       std::memcpy(dst + origIdx, src + permuteIdx, numBytes); | ||||
|     } | ||||
|   } else { | ||||
|     std::vector<int> backward_indices(N); | ||||
|     for (size_t i = 0; i < N; ++i) { | ||||
|       backward_indices[indices[i]] = i; | ||||
|     } | ||||
|     for (int n = 0; n < N; n++) { | ||||
|       int permuteIdx = n * K; | ||||
|       int origIdx = backward_indices[n] * K; | ||||
|       std::memcpy(dst + permuteIdx, src + origIdx, numBytes); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationOp<float, CPUContext>::RunOnDevice() { | ||||
|   auto& X = Input(0); | ||||
|   auto& indices = Input(1); | ||||
|  | ||||
|   CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d"); | ||||
|   CAFFE_ENFORCE( | ||||
|       X.dim32(0) == indices.dim32(0), | ||||
|       "X.dim32(0) must be equal to indices.dim32(0)", | ||||
|       "(", | ||||
|       X.dim32(0), | ||||
|       " vs. ", | ||||
|       indices.dim32(0), | ||||
|       ")"); | ||||
|  | ||||
|   auto* Y = Output(0, X.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   CAFFE_ENFORCE_GT(X.dim32(0), 0); | ||||
|   batch_permutation_loop<true>( | ||||
|       X.dim32(0), | ||||
|       X.numel() / X.dim32(0), | ||||
|       X.data<float>(), | ||||
|       indices.data<int>(), | ||||
|       Y->mutable_data<float>()); | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationGradientOp<float, CPUContext>::RunOnDevice() { | ||||
|   auto& indices = Input(0); | ||||
|   auto& dY = Input(1); | ||||
|  | ||||
|   auto* dX = Output(0, dY.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   CAFFE_ENFORCE_GT(dY.dim32(0), 0); | ||||
|   batch_permutation_loop<false>( | ||||
|       dY.dim32(0), | ||||
|       dY.numel() / dY.dim32(0), | ||||
|       dY.data<float>(), | ||||
|       indices.data<int>(), | ||||
|       dX->mutable_data<float>()); | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| #ifdef CAFFE2_USE_MKLDNN | ||||
| REGISTER_IDEEP_OPERATOR( | ||||
|     BatchPermutation, | ||||
|     IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>); | ||||
| #endif | ||||
|  | ||||
| REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>); | ||||
| REGISTER_CPU_OPERATOR( | ||||
|     BatchPermutationGradient, | ||||
|     BatchPermutationGradientOp<float, CPUContext>); | ||||
|  | ||||
| // Input: X, indices; Output: Y | ||||
| OPERATOR_SCHEMA(BatchPermutation) | ||||
|     .NumInputs(2) | ||||
|     .NumOutputs(1) | ||||
|     .SetDoc(R"DOC( | ||||
| Batch permutation of an input tensor X given input indices. First dimension of | ||||
| X equals batch size N. The indices stores a be permutation of N. | ||||
| The output Y is a tensor of same shape as X, with data re-ordered according to | ||||
| the indices within the batch size. | ||||
|  | ||||
| Example of batch permutation on a 2-D tensor with batch size 4: | ||||
|   X = [ | ||||
|     [1, 5, 2, 3, 4, 6, 0], | ||||
|     [4, 3, 3, 5, 2, 3, 1], | ||||
|     [2, 2, 3, 6, 0, 0, 1], | ||||
|     [0, 0, 1, 1, 2, 2, 3] | ||||
|   ] | ||||
|   indices = [2, 0, 1, 3] | ||||
|   Y = [ | ||||
|     [2, 2, 3, 6, 0, 0, 1], | ||||
|     [1, 5, 2, 3, 4, 6, 0], | ||||
|     [4, 3, 3, 5, 2, 3, 1], | ||||
|     [0, 0, 1, 1, 2, 2, 3] | ||||
|   ] | ||||
|  | ||||
| Example of batch permutation on a 3-D tensor with batch size 4: | ||||
|   X = [ | ||||
|     [[1, 5, 2], [3, 4, 6, 0]], | ||||
|     [[4, 3, 3], [5, 2, 3, 1]], | ||||
|     [[2, 2, 3], [6, 0, 0, 1]], | ||||
|     [[0, 0, 1], [1, 2, 2, 3]] | ||||
|   ] | ||||
|   indices = [2, 0, 1, 3] | ||||
|   Y = [ | ||||
|     [[2, 2, 3], [6, 0, 0, 1]], | ||||
|     [[1, 5, 2], [3, 4, 6, 0]], | ||||
|     [[4, 3, 3], [5, 2, 3, 1]], | ||||
|     [[0, 0, 1], [1, 2, 2, 3]] | ||||
|   ] | ||||
| )DOC") | ||||
|     .Input(0, "X", "Input tensor, where 1st dimension equals batch size") | ||||
|     .Input(1, "indices", "Input indices of batch to permute") | ||||
|     .Output(0, "Y", "Output permuted tensor"); | ||||
| // Input: indices, dY (aka "gradOutput"); Output: dX (aka "gradInput") | ||||
| OPERATOR_SCHEMA(BatchPermutationGradient).NumInputs(2).NumOutputs(1); | ||||
|  | ||||
| class GetBatchPermutationGradient : public GradientMakerBase { | ||||
|   using GradientMakerBase::GradientMakerBase; | ||||
|   vector<OperatorDef> GetGradientDefs() override { | ||||
|     return SingleGradientDef( | ||||
|         "BatchPermutationGradient", | ||||
|         "", | ||||
|         vector<string>{I(1), GO(0)}, | ||||
|         vector<string>{GI(0)}); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient); | ||||
|  | ||||
| } // namespace caffe2 | ||||
|  | ||||
| using BatchPermutationOpFloatCPU = | ||||
|     caffe2::BatchPermutationOp<float, caffe2::CPUContext>; | ||||
|  | ||||
| C10_EXPORT_CAFFE2_OP_TO_C10_CPU( | ||||
|     BatchPermutation, | ||||
|     "_caffe2::BatchPermutation(Tensor X, Tensor indices) -> Tensor", | ||||
|     BatchPermutationOpFloatCPU); | ||||
							
								
								
									
										113
									
								
								caffe2/operators/batch_permutation_op.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								caffe2/operators/batch_permutation_op.cu
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,113 @@ | ||||
| #include "caffe2/core/context_gpu.h" | ||||
| #include "caffe2/operators/batch_permutation_op.h" | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| namespace { | ||||
| template <bool forward> | ||||
| __global__ void BatchPermutationKernel( | ||||
|     int N, | ||||
|     int K, | ||||
|     const float* src, | ||||
|     const int* indices, | ||||
|     float* dst) { | ||||
|   if (forward) { | ||||
|     CUDA_1D_KERNEL_LOOP(index, N * K) { | ||||
|       int k = index % K; | ||||
|       int n = index / K; | ||||
|       int idx = indices[n]; | ||||
|       CUDA_KERNEL_ASSERT(idx >= 0); | ||||
|       CUDA_KERNEL_ASSERT(idx < N); | ||||
|       dst[index] = src[idx * K + k]; | ||||
|     } | ||||
|   } else { | ||||
|     CUDA_1D_KERNEL_LOOP(index, N * K) { | ||||
|       int k = index % K; | ||||
|       int n = index / K; | ||||
|  | ||||
|       // NOTE: an alternative implementation if we want to align the index with | ||||
|       // the output tensor (rather than the input tensor). | ||||
|       // int idx = -1; | ||||
|       // for (size_t i = 0; i < N; ++i) { | ||||
|       //   if (indices[i] == n) { | ||||
|       //     idx = i; | ||||
|       //   } | ||||
|       // } | ||||
|       // CUDA_KERNEL_ASSERT(idx >= 0); | ||||
|       // CUDA_KERNEL_ASSERT(idx < N); | ||||
|       // dst[index] = src[idx * K + k]; | ||||
|  | ||||
|       int idx = indices[n]; | ||||
|       CUDA_KERNEL_ASSERT(idx >= 0); | ||||
|       CUDA_KERNEL_ASSERT(idx < N); | ||||
|       dst[idx * K + k] = src[index]; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| } // namespace | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationOp<float, CUDAContext>::RunOnDevice() { | ||||
|   auto& X = Input(0); | ||||
|   auto& indices = Input(1); | ||||
|  | ||||
|   CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d"); | ||||
|   CAFFE_ENFORCE( | ||||
|       X.dim32(0) == indices.dim32(0), | ||||
|       "X.dim32(0) must be equal to indices.dim32(0)", | ||||
|       "(", | ||||
|       X.dim32(0), | ||||
|       " vs. ", | ||||
|       indices.dim32(0), | ||||
|       ")"); | ||||
|  | ||||
|   auto* Y = Output(0, X.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   CAFFE_ENFORCE_GT(X.dim32(0), 0); | ||||
|   BatchPermutationKernel<true> | ||||
|       <<<CAFFE_GET_BLOCKS(X.numel()), | ||||
|          CAFFE_CUDA_NUM_THREADS, | ||||
|          0, | ||||
|          context_.cuda_stream()>>>( | ||||
|           X.dim32(0), | ||||
|           X.numel() / X.dim32(0), | ||||
|           X.data<float>(), | ||||
|           indices.data<int>(), | ||||
|           Y->mutable_data<float>()); | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationGradientOp<float, CUDAContext>::RunOnDevice() { | ||||
|   auto& indices = Input(0); | ||||
|   auto& dY = Input(1); | ||||
|   auto* dX = Output(0, dY.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   CAFFE_ENFORCE_GT(dY.dim32(0), 0); | ||||
|   BatchPermutationKernel<false> | ||||
|       <<<CAFFE_GET_BLOCKS(dY.numel()), | ||||
|          CAFFE_CUDA_NUM_THREADS, | ||||
|          0, | ||||
|          context_.cuda_stream()>>>( | ||||
|           dY.dim32(0), | ||||
|           dY.numel() / dY.dim32(0), | ||||
|           dY.data<float>(), | ||||
|           indices.data<int>(), | ||||
|           dX->mutable_data<float>()); | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| REGISTER_CUDA_OPERATOR( | ||||
|     BatchPermutation, | ||||
|     BatchPermutationOp<float, CUDAContext>); | ||||
| REGISTER_CUDA_OPERATOR( | ||||
|     BatchPermutationGradient, | ||||
|     BatchPermutationGradientOp<float, CUDAContext>); | ||||
| } // namespace caffe2 | ||||
|  | ||||
| using BatchPermutationOpFloatCUDA = | ||||
|     caffe2::BatchPermutationOp<float, caffe2::CUDAContext>; | ||||
|  | ||||
| C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(BatchPermutation, BatchPermutationOpFloatCUDA); | ||||
							
								
								
									
										37
									
								
								caffe2/operators/batch_permutation_op.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								caffe2/operators/batch_permutation_op.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | ||||
| #ifndef BATCHPERMUTATION_OP_H_ | ||||
| #define BATCHPERMUTATION_OP_H_ | ||||
|  | ||||
| #include "caffe2/core/context.h" | ||||
| #include "caffe2/core/export_caffe2_op_to_c10.h" | ||||
| #include "caffe2/core/logging.h" | ||||
| #include "caffe2/core/operator.h" | ||||
| #include "caffe2/utils/math.h" | ||||
|  | ||||
| C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(BatchPermutation) | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| template <typename T, class Context> | ||||
| class BatchPermutationOp final : public Operator<Context> { | ||||
|  public: | ||||
|   template <class... Args> | ||||
|   explicit BatchPermutationOp(Args&&... args) | ||||
|       : Operator<Context>(std::forward<Args>(args)...) {} | ||||
|   USE_OPERATOR_CONTEXT_FUNCTIONS; | ||||
|  | ||||
|   bool RunOnDevice(); | ||||
| }; | ||||
|  | ||||
| template <typename T, class Context> | ||||
| class BatchPermutationGradientOp final : public Operator<Context> { | ||||
|  public: | ||||
|   BatchPermutationGradientOp(const OperatorDef& def, Workspace* ws) | ||||
|       : Operator<Context>(def, ws) {} | ||||
|   USE_OPERATOR_CONTEXT_FUNCTIONS; | ||||
|  | ||||
|   bool RunOnDevice(); | ||||
| }; | ||||
|  | ||||
| } // namespace caffe2 | ||||
|  | ||||
| #endif // BATCHPERMUTATION_OP_H_ | ||||
							
								
								
									
										269
									
								
								caffe2/operators/batch_permutation_op_gpu_test.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								caffe2/operators/batch_permutation_op_gpu_test.cc
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,269 @@ | ||||
| #include "caffe2/core/context_gpu.h" | ||||
| #include "caffe2/core/flags.h" | ||||
| #include "caffe2/operators/batch_permutation_op.h" | ||||
| #include "caffe2/utils/eigen_utils.h" | ||||
| #include "caffe2/utils/math.h" | ||||
| #include "gtest/gtest.h" | ||||
|  | ||||
| namespace caffe2 { | ||||
| namespace { | ||||
|  | ||||
| // Add the vector as an input to a Workspace depending on the context of the | ||||
| // workspace | ||||
|  | ||||
| template <typename T> | ||||
| void AddInputCPU( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<T>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws) { | ||||
|   Blob* blob = ws->CreateBlob(name); | ||||
|   auto* tensor = BlobGetMutableTensor(blob, CPU); | ||||
|   tensor->Resize(shape); | ||||
|   EigenVectorMap<T> tensor_vec(tensor->mutable_data<T>(), tensor->numel()); | ||||
|   tensor_vec.array() = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>{ | ||||
|       values.data(), static_cast<int>(values.size())}; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| void AddInputGPU( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<T>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws) { | ||||
|   Tensor tmp(shape, CPU); | ||||
|   EigenVectorMap<T> tmp_vec(tmp.mutable_data<T>(), tmp.numel()); | ||||
|   tmp_vec.array() = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>{ | ||||
|       values.data(), static_cast<int>(values.size())}; | ||||
|  | ||||
|   Blob* blob = ws->CreateBlob(name); | ||||
|   auto* tensor = BlobGetMutableTensor(blob, CUDA); | ||||
|   tensor->CopyFrom(tmp); | ||||
| } | ||||
|  | ||||
| // Overload 4 different signatures for AddInput because clang does not allow | ||||
| // template <typename T> | ||||
| // void AddInput<CPUContext>(...) {...} | ||||
|  | ||||
| template <typename T, class Context> | ||||
| void AddInput( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<T>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws); | ||||
|  | ||||
| template <> | ||||
| void AddInput<int, CPUContext>( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<int>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws) { | ||||
|   AddInputCPU<int>(shape, values, name, ws); | ||||
| } | ||||
|  | ||||
| template <> | ||||
| void AddInput<float, CPUContext>( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<float>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws) { | ||||
|   AddInputCPU<float>(shape, values, name, ws); | ||||
| } | ||||
|  | ||||
| template <> | ||||
| void AddInput<int, CUDAContext>( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<int>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws) { | ||||
|   AddInputGPU<int>(shape, values, name, ws); | ||||
| } | ||||
|  | ||||
| template <> | ||||
| void AddInput<float, CUDAContext>( | ||||
|     const vector<int64_t>& shape, | ||||
|     const vector<float>& values, | ||||
|     const string& name, | ||||
|     Workspace* ws) { | ||||
|   AddInputGPU<float>(shape, values, name, ws); | ||||
| } | ||||
|  | ||||
| template <class Context> | ||||
| DeviceTypeProto GetDeviceType() { | ||||
|   return PROTO_CPU; | ||||
| } | ||||
| template <> | ||||
| DeviceTypeProto GetDeviceType<CUDAContext>() { | ||||
|   return PROTO_CUDA; | ||||
| } | ||||
|  | ||||
| // Create a BatchPermutationOp with the given inputs (actual values are | ||||
| // generated sequentially) and run it | ||||
| template <class Context> | ||||
| void CreateAndRun( | ||||
|     TensorCPU* outResult, | ||||
|     int N, | ||||
|     vector<int64_t>& shape, | ||||
|     vector<float>& features, | ||||
|     vector<int> indices) { | ||||
|   Workspace ws; | ||||
|  | ||||
|   AddInput<float, Context>(shape, features, "X", &ws); | ||||
|   AddInput<int, Context>(vector<int64_t>{N}, indices, "indices", &ws); | ||||
|  | ||||
|   OperatorDef def; | ||||
|   def.set_name("test"); | ||||
|   def.set_type("BatchPermutation"); | ||||
|   def.add_input("X"); | ||||
|   def.add_input("indices"); | ||||
|   def.add_output("Y"); | ||||
|   def.mutable_device_option()->set_device_type(GetDeviceType<Context>()); | ||||
|   unique_ptr<OperatorBase> op = CreateOperator(def, &ws); | ||||
|  | ||||
|   EXPECT_NE(nullptr, op.get()); | ||||
|   EXPECT_TRUE(op->Run()); | ||||
|  | ||||
|   Blob* Y_blob = ws.GetBlob("Y"); | ||||
|   EXPECT_NE(nullptr, Y_blob); | ||||
|  | ||||
|   auto& Y = Y_blob->Get<Tensor>(); | ||||
|   outResult->CopyFrom(Y); | ||||
| } | ||||
|  | ||||
| // Create a BatchPermutationOp with the given inputs (actual values are | ||||
| // generated sequentially) and run it | ||||
| template <class Context> | ||||
| void CreateAndRunGradient( | ||||
|     TensorCPU* outResult, | ||||
|     int N, | ||||
|     vector<int64_t>& shape, | ||||
|     vector<float>& features, | ||||
|     vector<int> indices) { | ||||
|   Workspace ws; | ||||
|  | ||||
|   AddInput<float, Context>(shape, features, "dY", &ws); | ||||
|   AddInput<int, Context>(vector<int64_t>{N}, indices, "indices", &ws); | ||||
|  | ||||
|   OperatorDef def; | ||||
|   def.set_name("test"); | ||||
|   def.set_type("BatchPermutationGradient"); | ||||
|   def.add_input("indices"); | ||||
|   def.add_input("dY"); | ||||
|   def.add_output("dX"); | ||||
|   def.mutable_device_option()->set_device_type(GetDeviceType<Context>()); | ||||
|   unique_ptr<OperatorBase> op = CreateOperator(def, &ws); | ||||
|  | ||||
|   EXPECT_NE(nullptr, op.get()); | ||||
|   EXPECT_TRUE(op->Run()); | ||||
|  | ||||
|   Blob* Y_blob = ws.GetBlob("dX"); | ||||
|   EXPECT_NE(nullptr, Y_blob); | ||||
|  | ||||
|   auto& Y = Y_blob->Get<Tensor>(); | ||||
|   outResult->CopyFrom(Y); | ||||
| } | ||||
|  | ||||
| // Check that the CPU and GPU implementations provide the exact same results | ||||
| void CheckCPUGPUEqual(vector<int64_t> shape, vector<int> indices) { | ||||
|   // Prepare input data | ||||
|   EXPECT_GT(shape.size(), 1); | ||||
|   int N = shape[0]; | ||||
|   int input_size = 1; | ||||
|   for (auto k : shape) { | ||||
|     input_size *= k; | ||||
|   } | ||||
|   int K = input_size / N; | ||||
|   vector<float> features(input_size); | ||||
|   std::iota(features.begin(), features.end(), 0); | ||||
|  | ||||
|   // CPU outputs | ||||
|   Tensor y_cpu{CPU}; | ||||
|   Tensor y_cpu_grad{CPU}; | ||||
|  | ||||
|   // CPU BatchPermutation | ||||
|   CreateAndRun<CPUContext>(&y_cpu, N, shape, features, indices); | ||||
|  | ||||
|   // CPU BatchPermutationGradient | ||||
|   CreateAndRunGradient<CPUContext>(&y_cpu_grad, N, shape, features, indices); | ||||
|  | ||||
|   // Check CPU output values | ||||
|   for (auto i = 0; i < indices.size(); ++i) { | ||||
|     for (auto k = 0; k < K; ++k) { | ||||
|       EXPECT_NEAR( | ||||
|           y_cpu.data<float>()[indices[i] * K + k], features[i * K + k], 1e4); | ||||
|       EXPECT_NEAR( | ||||
|           y_cpu_grad.data<float>()[i * K + k], | ||||
|           features[indices[i] * K + k], | ||||
|           1e4); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (!caffe2::HasCudaGPU()) { | ||||
|     VLOG(2) << "No CudaGPU found. Skip GPU test." << std::endl; | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   // GPU outputs | ||||
|   Tensor y_gpu{CPU}; | ||||
|   Tensor y_gpu_grad{CPU}; | ||||
|  | ||||
|   // GPU BatchPermutation | ||||
|   CreateAndRun<CPUContext>(&y_gpu, N, shape, features, indices); | ||||
|  | ||||
|   // Compare CPU and GPU BatchPermutation outputs | ||||
|   EXPECT_EQ(y_cpu.sizes(), y_gpu.sizes()); | ||||
|   ConstEigenVectorMap<float> y_cpu_vec(y_cpu.data<float>(), y_cpu.numel()); | ||||
|   ConstEigenVectorMap<float> y_gpu_vec(y_gpu.data<float>(), y_gpu.numel()); | ||||
|   EXPECT_TRUE(y_cpu_vec.isApprox(y_gpu_vec)); | ||||
|  | ||||
|   // GPU BatchPermutationGradient | ||||
|   CreateAndRunGradient<CUDAContext>(&y_gpu_grad, N, shape, features, indices); | ||||
|  | ||||
|   // Check GPU outputs | ||||
|   for (auto i = 0; i < indices.size(); ++i) { | ||||
|     for (auto k = 0; k < K; ++k) { | ||||
|       EXPECT_NEAR( | ||||
|           y_gpu.data<float>()[indices[i] * K + k], features[i * K + k], 1e4); | ||||
|       EXPECT_NEAR( | ||||
|           y_gpu_grad.data<float>()[i * K + k], | ||||
|           features[indices[i] * K + k], | ||||
|           1e4); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   // Compare CPU and GPU BatchPermutationGradient outputs | ||||
|   EXPECT_EQ(y_cpu_grad.sizes(), y_gpu_grad.sizes()); | ||||
|   ConstEigenVectorMap<float> y_cpu_vec_grad( | ||||
|       y_cpu_grad.data<float>(), y_cpu_grad.numel()); | ||||
|   ConstEigenVectorMap<float> y_gpu_vec_grad( | ||||
|       y_gpu_grad.data<float>(), y_gpu_grad.numel()); | ||||
|   EXPECT_TRUE(y_cpu_vec_grad.isApprox(y_gpu_vec_grad)); | ||||
| } | ||||
|  | ||||
| } // namespace | ||||
|  | ||||
| TEST(BatchPermutationTest, CHECKCPUGPUEqualGenericDimension) { | ||||
|   auto t0 = std::chrono::high_resolution_clock::now(); | ||||
|   int batch_size = 8; | ||||
|   int max_dimension = 6; | ||||
|   vector<int64_t> shape = vector<int64_t>{batch_size}; | ||||
|  | ||||
|   auto seed = std::chrono::system_clock::now().time_since_epoch().count(); | ||||
|   std::default_random_engine generator(seed); | ||||
|  | ||||
|   for (int i = 2; i < max_dimension; ++i) { | ||||
|     std::uniform_int_distribution<> dis(1, i); | ||||
|     shape.push_back(dis(generator)); | ||||
|     CheckCPUGPUEqual(shape, vector<int>{0, 1, 2, 3, 4, 5, 6, 7}); | ||||
|     CheckCPUGPUEqual(shape, vector<int>{7, 6, 5, 4, 3, 2, 1, 0}); | ||||
|     CheckCPUGPUEqual(shape, vector<int>{1, 3, 5, 7, 0, 2, 4, 6}); | ||||
|     CheckCPUGPUEqual(shape, vector<int>{4, 5, 6, 7, 0, 1, 2, 3}); | ||||
|     CheckCPUGPUEqual(shape, vector<int>{3, 1, 5, 7, 6, 2, 4, 0}); | ||||
|   } | ||||
|   auto t1 = std::chrono::high_resolution_clock::now(); | ||||
|   double elapsed = | ||||
|       std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count(); | ||||
|   VLOG(2) << "Time elapsed: " << elapsed << " ms" << std::endl; | ||||
|   return; | ||||
| } | ||||
| } // namespace caffe2 | ||||
| @ -136,7 +136,7 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() { | ||||
|     bias_data = Input(BIAS).template data<T>(); | ||||
|   } | ||||
|  | ||||
|   auto f = [&](Tensor* col_buffer) { | ||||
|   auto f = [this, &filter_offset, &bias_data, &X, &buffer_shape, &N, &Xdata, &offset_data, &M, &filter, &output_image_size, &kernel_dim, &Ydata, &input_offset, &offset_offset, &output_offset] (Tensor* col_buffer) { | ||||
|     col_buffer->Resize(buffer_shape); | ||||
|     T* col_buffer_data = col_buffer->template mutable_data<T>(); | ||||
|     // Im2col, followed by gemm. | ||||
|  | ||||
							
								
								
									
										31
									
								
								caffe2/python/operator_test/alias_with_name_test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								caffe2/python/operator_test/alias_with_name_test.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import caffe2.python.hypothesis_test_util as hu | ||||
| import hypothesis.strategies as st | ||||
| import numpy as np | ||||
| from caffe2.python import core, utils | ||||
| from hypothesis import given | ||||
|  | ||||
|  | ||||
| class TestAliasWithNameOp(hu.HypothesisTestCase): | ||||
|     @given( | ||||
|         shape=st.lists(st.integers(0, 5), min_size=1, max_size=3), | ||||
|         dtype=st.sampled_from([np.float32, np.int64]), | ||||
|         **hu.gcs | ||||
|     ) | ||||
|     def test_alias_with_name_op(self, shape, dtype, dc, gc): | ||||
|         test_input = (100 * np.random.random(shape)).astype(dtype) | ||||
|         test_inputs = [test_input] | ||||
|  | ||||
|         alias_op = core.CreateOperator( | ||||
|             "AliasWithName", | ||||
|             ["input"], | ||||
|             ["output"], | ||||
|             device_option=gc, | ||||
|         ) | ||||
|         alias_op.arg.add().CopyFrom(utils.MakeArgument("name", "whatever_name")) | ||||
|  | ||||
|         def reference_func(x): | ||||
|             return (x,) | ||||
|  | ||||
|         self.assertReferenceChecks(gc, alias_op, test_inputs, reference_func) | ||||
| @ -139,6 +139,7 @@ class TestMomentumSGD(serial.SerializedTestCase): | ||||
|             [grad, m, lr, w, indices], | ||||
|             sparse) | ||||
|  | ||||
|     @unittest.skip("Test is flaky, see https://github.com/pytorch/pytorch/issues/31368") | ||||
|     @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.") | ||||
|     @given(n=st.integers(4, 8), nesterov=st.booleans(), **hu.gcs) | ||||
|     def test_fp16momentum_sgd(self, n, nesterov, gc, dc): | ||||
|  | ||||
| @ -710,6 +710,15 @@ class TorchIntegration(hu.HypothesisTestCase): | ||||
|  | ||||
|         torch.testing.assert_allclose(torch.tensor(expected_output), actual_output) | ||||
|  | ||||
|     def test_alias_with_name_is_in_place(self): | ||||
|         device = "cuda" if workspace.has_cuda_support else "cpu" | ||||
|         x = torch.Tensor([3, 42]).to(device) | ||||
|         y = torch.ops._caffe2.AliasWithName(x, "new_name") | ||||
|         x[1] = 6 | ||||
|         torch.testing.assert_allclose(x, torch.Tensor([3, 6]).to(device)) | ||||
|         # y should also change because y is alias of x | ||||
|         torch.testing.assert_allclose(y, torch.Tensor([3, 6]).to(device)) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|  | ||||
| @ -36,6 +36,7 @@ torch.optim | ||||
|  | ||||
| -  Vincent Quenneville-Belair (`vincentqb <https://github.com/vincentqb>`__) | ||||
| -  Soumith Chintala (`soumith <https://github.com/soumith>`__) | ||||
| -  Wanchao Liang (`wanchaol <https://github.com/wanchaol>`__) | ||||
|  | ||||
| Autograd Engine | ||||
| ~~~~~~~~~~~~~~~ | ||||
| @ -95,6 +96,20 @@ MKLDNN | ||||
| -  Junjie Bai (`bddppq <https://github.com/bddppq>`__) | ||||
| -  Yinghai Lu (`yinghai <https://github.com/yinghai>`__) | ||||
|  | ||||
| Mobile | ||||
| ~~~~~~ | ||||
|  | ||||
| -  David Reiss (`dreiss <https://github.com/dreiss>`__) | ||||
| -  Jiakai Liu (`ljk53 <https://github.com/ljk53>`__) | ||||
|  | ||||
| Quantization | ||||
| ~~~~~~ | ||||
|  | ||||
| -  Raghuraman Krishnamoorthi (`dreiss <https://github.com/dreiss>`__) | ||||
| -  Jerry Zhang (`jerryzh168 <https://github.com/jerryzh168>`__) | ||||
| -  Lingyi Liu (`lly-zero-one <https://github.com/lly-zero-one>`__) | ||||
| -  James Reed (`jamesr66a <https://github.com/jamesr66a>`__) | ||||
|  | ||||
| XLA | ||||
| ~~~ | ||||
|  | ||||
| @ -138,6 +153,9 @@ ONNX <-> PyTorch | ||||
| ~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| -  Lu Fang (`houseroad <https://github.com/houseroad>`__) | ||||
| -  Lara Haidar (`lara-hdr <https://github.com/lara-hdr>`__) | ||||
| -  Spandan Tiwari (`spandantiwari <https://github.com/spandantiwari>`__) | ||||
| -  Bowen Bao (`BowenBao <https://github.com/BowenBao>`__) | ||||
|  | ||||
| Windows | ||||
| ~~~~~~~ | ||||
|  | ||||
| @ -46,7 +46,6 @@ extensions = [ | ||||
|     'sphinx.ext.autodoc', | ||||
|     'sphinx.ext.autosummary', | ||||
|     'sphinx.ext.doctest', | ||||
|     'sphinx.ext.intersphinx', | ||||
|     'sphinx.ext.todo', | ||||
|     'sphinx.ext.coverage', | ||||
|     'sphinx.ext.napoleon', | ||||
|  | ||||
| @ -19,11 +19,11 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. | ||||
|    PyTorch on XLA Devices <http://pytorch.org/xla/> | ||||
|     | ||||
| .. toctree:: | ||||
|   :glob: | ||||
|   :maxdepth: 1 | ||||
|   :caption: Community | ||||
|    :maxdepth: 1 | ||||
|    :caption: Language Bindings | ||||
|  | ||||
|   community/* | ||||
|    C++ API <https://pytorch.org/cppdocs/> | ||||
|    Javadoc <https://pytorch.org/javadoc/> | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 1 | ||||
| @ -80,11 +80,11 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. | ||||
|    torchtext <https://pytorch.org/text> | ||||
|     | ||||
| .. toctree:: | ||||
|    :maxdepth: 1 | ||||
|    :caption: Other Languages | ||||
|   :glob: | ||||
|   :maxdepth: 1 | ||||
|   :caption: Community | ||||
|  | ||||
|    C++ API <https://pytorch.org/cppdocs/> | ||||
|    packages | ||||
|   community/* | ||||
|  | ||||
| Indices and tables | ||||
| ================== | ||||
|  | ||||
| @ -314,13 +314,14 @@ The :class:`~torch.distributed.optim.DistributedOptimizer` operates as follows: | ||||
| Simple end to end example | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| Putting it all together, a very simple end to end example using distributed | ||||
| autograd and distributed optimizer is as follows: | ||||
| Putting it all together, the following is a simple end to end example using | ||||
| distributed autograd and the distributed optimizer. If the code is placed into a | ||||
| file called "dist_autograd_simple.py", it can be run with the command | ||||
| :code:`MASTER_ADDR="localhost" MASTER_PORT=29500 python dist_autograd_simple.py`: | ||||
|  | ||||
| .. code:: | ||||
|  | ||||
|   import multiprocessing as mp | ||||
|   from tempfile import NamedTemporaryFile | ||||
|   import torch | ||||
|   import torch.distributed.autograd as dist_autograd | ||||
|   from torch.distributed import rpc | ||||
| @ -330,52 +331,52 @@ autograd and distributed optimizer is as follows: | ||||
|   def random_tensor(): | ||||
|       return torch.rand((3, 3), requires_grad=True) | ||||
|  | ||||
|   def _run_process(self_rank, dst_rank, file_name): | ||||
|       self_name = "worker{}".format(self_rank) | ||||
|   def _run_process(rank, dst_rank, world_size): | ||||
|       name = "worker{}".format(rank) | ||||
|       dst_name = "worker{}".format(dst_rank) | ||||
|  | ||||
|       # Initialize RPC. | ||||
|       rpc.init_rpc( | ||||
|           self_name=self_name, | ||||
|           self_rank=self_rank, | ||||
|           worker_name_to_id={"worker0": 0, "worker1": 1}, | ||||
|           init_method="file://{}".format(file_name), | ||||
|           name=name, | ||||
|           rank=rank, | ||||
|           world_size=world_size | ||||
|       ) | ||||
|  | ||||
|       # Use a distributed autograd context. | ||||
|       with dist_autograd.context() as context_id: | ||||
|          # Forward pass (create references on remote nodes). | ||||
|          rref1 = rpc.remote(dst_name, random_tensor) | ||||
|          rref2 = rpc.remote(dst_name, random_tensor) | ||||
|          loss = rref1.to_here() + rref2.to_here() | ||||
|           # Forward pass (create references on remote nodes). | ||||
|           rref1 = rpc.remote(dst_name, random_tensor) | ||||
|           rref2 = rpc.remote(dst_name, random_tensor) | ||||
|           loss = rref1.to_here() + rref2.to_here() | ||||
|  | ||||
|          # Backward pass (run distributed autograd). | ||||
|          dist_autograd.backward([loss.sum()]) | ||||
|           # Backward pass (run distributed autograd). | ||||
|           dist_autograd.backward([loss.sum()]) | ||||
|  | ||||
|          # Build DistributedOptimizer. | ||||
|          dist_optim = DistributedOptimizer( | ||||
|            optim.SGD, | ||||
|            [rref1, rref2], | ||||
|            lr=0.05, | ||||
|          ) | ||||
|           # Build DistributedOptimizer. | ||||
|           dist_optim = DistributedOptimizer( | ||||
|           optim.SGD, | ||||
|           [rref1, rref2], | ||||
|           lr=0.05, | ||||
|           ) | ||||
|  | ||||
|          # Run the distributed optimizer step. | ||||
|          dist_optim.step() | ||||
|           # Run the distributed optimizer step. | ||||
|           dist_optim.step() | ||||
|  | ||||
|   def run_process(self_rank, dst_rank, file_name): | ||||
|       _run_process(self_rank, dst_rank, file_name) | ||||
|       rpc.wait_all_workers() | ||||
|   def run_process(rank, dst_rank, world_size): | ||||
|       _run_process(rank, dst_rank, world_size) | ||||
|       rpc.shutdown() | ||||
|  | ||||
|   file_name = NamedTemporaryFile().name | ||||
|   processes = [] | ||||
|  | ||||
|   # Run two workers. | ||||
|   for i in range(2): | ||||
|       p = mp.Process(target=run_process, args=(i, (i + 1) % 2, file_name)) | ||||
|   # Run world_size workers. | ||||
|   world_size = 2 | ||||
|   for i in range(world_size): | ||||
|       p = mp.Process(target=run_process, args=(i, (i + 1) % 2, world_size)) | ||||
|       p.start() | ||||
|       processes.append(p) | ||||
|  | ||||
|   for p in processes: | ||||
|       p.join() | ||||
|  | ||||
|  | ||||
| .. _RFC: https://github.com/pytorch/pytorch/issues/23110 | ||||
|  | ||||
| @ -1,67 +0,0 @@ | ||||
| DType | ||||
| ===== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: public enum DType | ||||
|  | ||||
|    Codes representing tensor data types. | ||||
|  | ||||
| Enum Constants | ||||
| -------------- | ||||
| FLOAT32 | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:field:: public static final DType FLOAT32 | ||||
|    :outertype: DType | ||||
|  | ||||
|    Code for dtype torch.float32. \ :java:ref:`Tensor.dtype()`\ | ||||
|  | ||||
| FLOAT64 | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:field:: public static final DType FLOAT64 | ||||
|    :outertype: DType | ||||
|  | ||||
|    Code for dtype torch.float64. \ :java:ref:`Tensor.dtype()`\ | ||||
|  | ||||
| INT32 | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:field:: public static final DType INT32 | ||||
|    :outertype: DType | ||||
|  | ||||
|    Code for dtype torch.int32. \ :java:ref:`Tensor.dtype()`\ | ||||
|  | ||||
| INT64 | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:field:: public static final DType INT64 | ||||
|    :outertype: DType | ||||
|  | ||||
|    Code for dtype torch.int64. \ :java:ref:`Tensor.dtype()`\ | ||||
|  | ||||
| INT8 | ||||
| ^^^^ | ||||
|  | ||||
| .. java:field:: public static final DType INT8 | ||||
|    :outertype: DType | ||||
|  | ||||
|    Code for dtype torch.int8. \ :java:ref:`Tensor.dtype()`\ | ||||
|  | ||||
| UINT8 | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:field:: public static final DType UINT8 | ||||
|    :outertype: DType | ||||
|  | ||||
|    Code for dtype torch.uint8. \ :java:ref:`Tensor.dtype()`\ | ||||
|  | ||||
| Fields | ||||
| ------ | ||||
| jniCode | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:field:: final int jniCode | ||||
|    :outertype: DType | ||||
| @ -1,297 +0,0 @@ | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| .. java:import:: java.util Map | ||||
|  | ||||
| IValue | ||||
| ====== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: public class IValue | ||||
|  | ||||
|    Java representation of a TorchScript value, which is implemented as tagged union that can be one of the supported types: https://pytorch.org/docs/stable/jit.html#types . | ||||
|  | ||||
|    Calling \ ``toX``\  methods for inappropriate types will throw \ :java:ref:`IllegalStateException`\ . | ||||
|  | ||||
|    \ ``IValue``\  objects are constructed with \ ``IValue.from(value)``\ , \ ``IValue.tupleFrom(value1, value2, ...)``\ , \ ``IValue.listFrom(value1, value2, ...)``\ , or one of the \ ``dict``\  methods, depending on the key type. | ||||
|  | ||||
|    Data is retrieved from \ ``IValue``\  objects with the \ ``toX()``\  methods. Note that \ ``str``\ -type IValues must be extracted with \ :java:ref:`toStr()`\ , rather than \ :java:ref:`toString()`\ . | ||||
|  | ||||
|    \ ``IValue``\  objects may retain references to objects passed into their constructors, and may return references to their internal state from \ ``toX()``\ . | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dictLongKeyFrom | ||||
| ^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue dictLongKeyFrom(Map<Long, IValue> map) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``Dict[int, V]``\ . | ||||
|  | ||||
| dictStringKeyFrom | ||||
| ^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue dictStringKeyFrom(Map<String, IValue> map) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``Dict[str, V]``\ . | ||||
|  | ||||
| from | ||||
| ^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue from(Tensor tensor) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``Tensor``\ . | ||||
|  | ||||
| from | ||||
| ^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue from(boolean value) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``bool``\ . | ||||
|  | ||||
| from | ||||
| ^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue from(long value) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``int``\ . | ||||
|  | ||||
| from | ||||
| ^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue from(double value) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``float``\ . | ||||
|  | ||||
| from | ||||
| ^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue from(String value) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``str``\ . | ||||
|  | ||||
| isBool | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isBool() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isBoolList | ||||
| ^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isBoolList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isDictLongKey | ||||
| ^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isDictLongKey() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isDictStringKey | ||||
| ^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isDictStringKey() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isDouble | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isDouble() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isDoubleList | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isDoubleList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isList | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isLong | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isLong() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isLongList | ||||
| ^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isLongList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isNull | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isNull() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isString() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isTensor | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isTensor() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isTensorList | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isTensorList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| isTuple | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean isTuple() | ||||
|    :outertype: IValue | ||||
|  | ||||
| listFrom | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue listFrom(boolean... list) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``List[bool]``\ . | ||||
|  | ||||
| listFrom | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue listFrom(long... list) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``List[int]``\ . | ||||
|  | ||||
| listFrom | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue listFrom(double... list) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``List[float]``\ . | ||||
|  | ||||
| listFrom | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue listFrom(Tensor... list) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``List[Tensor]``\ . | ||||
|  | ||||
| listFrom | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue listFrom(IValue... array) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``List[T]``\ . All elements must have the same type. | ||||
|  | ||||
| optionalNull | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue optionalNull() | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``Optional``\  that contains no value. | ||||
|  | ||||
| toBool | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean toBool() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toBoolList | ||||
| ^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public boolean[] toBoolList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toDictLongKey | ||||
| ^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public Map<Long, IValue> toDictLongKey() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toDictStringKey | ||||
| ^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public Map<String, IValue> toDictStringKey() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toDouble | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public double toDouble() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toDoubleList | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public double[] toDoubleList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toList | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public IValue[] toList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toLong | ||||
| ^^^^^^ | ||||
|  | ||||
| .. java:method:: public long toLong() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toLongList | ||||
| ^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public long[] toLongList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toStr | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: public String toStr() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toTensor | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public Tensor toTensor() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toTensorList | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public Tensor[] toTensorList() | ||||
|    :outertype: IValue | ||||
|  | ||||
| toTuple | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:method:: public IValue[] toTuple() | ||||
|    :outertype: IValue | ||||
|  | ||||
| tupleFrom | ||||
| ^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IValue tupleFrom(IValue... array) | ||||
|    :outertype: IValue | ||||
|  | ||||
|    Creates a new \ ``IValue``\  of type \ ``Tuple[T0, T1, ...]``\ . | ||||
| @ -1,55 +0,0 @@ | ||||
| .. java:import:: com.facebook.jni HybridData | ||||
|  | ||||
| Module | ||||
| ====== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: public class Module | ||||
|  | ||||
|    Java wrapper for torch::jit::script::Module. | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| destroy | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:method:: public void destroy() | ||||
|    :outertype: Module | ||||
|  | ||||
|    Explicitly destroys the native torch::jit::script::Module. Calling this method is not required, as the native object will be destroyed when this object is garbage-collected. However, the timing of garbage collection is not guaranteed, so proactively calling \ ``destroy``\  can free memory more quickly. See \ :java:ref:`com.facebook.jni.HybridData.resetNative`\ . | ||||
|  | ||||
| forward | ||||
| ^^^^^^^ | ||||
|  | ||||
| .. java:method:: public IValue forward(IValue... inputs) | ||||
|    :outertype: Module | ||||
|  | ||||
|    Runs the 'forward' method of this module with the specified arguments. | ||||
|  | ||||
|    :param inputs: arguments for the TorchScript module's 'forward' method. | ||||
|    :return: return value from the 'forward' method. | ||||
|  | ||||
| load | ||||
| ^^^^ | ||||
|  | ||||
| .. java:method:: public static Module load(String modelPath) | ||||
|    :outertype: Module | ||||
|  | ||||
|    Loads a serialized TorchScript module from the specified path on the disk. | ||||
|  | ||||
|    :param modelPath: path to file that contains the serialized TorchScript module. | ||||
|    :return: new \ :java:ref:`org.pytorch.Module`\  object which owns torch::jit::script::Module. | ||||
|  | ||||
| runMethod | ||||
| ^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public IValue runMethod(String methodName, IValue... inputs) | ||||
|    :outertype: Module | ||||
|  | ||||
|    Runs the specified method of this module with the specified arguments. | ||||
|  | ||||
|    :param methodName: name of the TorchScript method to run. | ||||
|    :param inputs: arguments that will be passed to TorchScript method. | ||||
|    :return: return value from the method. | ||||
| @ -1,60 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor.Tensor_float32 | ||||
| ===================== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: static class Tensor_float32 extends Tensor | ||||
|    :outertype: Tensor | ||||
|  | ||||
| Constructors | ||||
| ------------ | ||||
| Tensor_float32 | ||||
| ^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:constructor::  Tensor_float32(FloatBuffer data, long[] shape) | ||||
|    :outertype: Tensor.Tensor_float32 | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public DType dtype() | ||||
|    :outertype: Tensor.Tensor_float32 | ||||
|  | ||||
| getDataAsFloatArray | ||||
| ^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public float[] getDataAsFloatArray() | ||||
|    :outertype: Tensor.Tensor_float32 | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor.Tensor_float32 | ||||
|  | ||||
| toString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public String toString() | ||||
|    :outertype: Tensor.Tensor_float32 | ||||
| @ -1,52 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor.Tensor_float64 | ||||
| ===================== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: static class Tensor_float64 extends Tensor | ||||
|    :outertype: Tensor | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public DType dtype() | ||||
|    :outertype: Tensor.Tensor_float64 | ||||
|  | ||||
| getDataAsDoubleArray | ||||
| ^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public double[] getDataAsDoubleArray() | ||||
|    :outertype: Tensor.Tensor_float64 | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor.Tensor_float64 | ||||
|  | ||||
| toString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public String toString() | ||||
|    :outertype: Tensor.Tensor_float64 | ||||
| @ -1,52 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor.Tensor_int32 | ||||
| =================== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: static class Tensor_int32 extends Tensor | ||||
|    :outertype: Tensor | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public DType dtype() | ||||
|    :outertype: Tensor.Tensor_int32 | ||||
|  | ||||
| getDataAsIntArray | ||||
| ^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public int[] getDataAsIntArray() | ||||
|    :outertype: Tensor.Tensor_int32 | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor.Tensor_int32 | ||||
|  | ||||
| toString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public String toString() | ||||
|    :outertype: Tensor.Tensor_int32 | ||||
| @ -1,52 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor.Tensor_int64 | ||||
| =================== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: static class Tensor_int64 extends Tensor | ||||
|    :outertype: Tensor | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public DType dtype() | ||||
|    :outertype: Tensor.Tensor_int64 | ||||
|  | ||||
| getDataAsLongArray | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public long[] getDataAsLongArray() | ||||
|    :outertype: Tensor.Tensor_int64 | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor.Tensor_int64 | ||||
|  | ||||
| toString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public String toString() | ||||
|    :outertype: Tensor.Tensor_int64 | ||||
| @ -1,52 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor.Tensor_int8 | ||||
| ================== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: static class Tensor_int8 extends Tensor | ||||
|    :outertype: Tensor | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public DType dtype() | ||||
|    :outertype: Tensor.Tensor_int8 | ||||
|  | ||||
| getDataAsByteArray | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public byte[] getDataAsByteArray() | ||||
|    :outertype: Tensor.Tensor_int8 | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor.Tensor_int8 | ||||
|  | ||||
| toString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public String toString() | ||||
|    :outertype: Tensor.Tensor_int8 | ||||
| @ -1,52 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor.Tensor_uint8 | ||||
| =================== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: static class Tensor_uint8 extends Tensor | ||||
|    :outertype: Tensor | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public DType dtype() | ||||
|    :outertype: Tensor.Tensor_uint8 | ||||
|  | ||||
| getDataAsUnsignedByteArray | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public byte[] getDataAsUnsignedByteArray() | ||||
|    :outertype: Tensor.Tensor_uint8 | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor.Tensor_uint8 | ||||
|  | ||||
| toString | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: @Override public String toString() | ||||
|    :outertype: Tensor.Tensor_uint8 | ||||
| @ -1,315 +0,0 @@ | ||||
| .. java:import:: java.nio Buffer | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio ByteOrder | ||||
|  | ||||
| .. java:import:: java.nio DoubleBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.nio IntBuffer | ||||
|  | ||||
| .. java:import:: java.nio LongBuffer | ||||
|  | ||||
| .. java:import:: java.util Arrays | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| Tensor | ||||
| ====== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: public abstract class Tensor | ||||
|  | ||||
|    Representation of a Tensor. Behavior is similar to PyTorch's tensor objects. | ||||
|  | ||||
|    Most tensors will be constructed as \ ``Tensor.fromBlob(data, shape)``\ , where \ ``data``\  can be an array or a direct \ :java:ref:`Buffer`\  (of the proper subclass). Helper methods are provided to allocate buffers properly. | ||||
|  | ||||
|    To access Tensor data, see \ :java:ref:`dtype()`\ , \ :java:ref:`shape()`\ , and various \ ``getDataAs*``\  methods. | ||||
|  | ||||
|    When constructing \ ``Tensor``\  objects with \ ``data``\  as an array, it is not specified whether this data is is copied or retained as a reference so it is recommended not to modify it after constructing. \ ``data``\  passed as a \ :java:ref:`Buffer`\  is not copied, so it can be modified between \ :java:ref:`Module`\  calls to avoid reallocation. Data retrieved from \ ``Tensor``\  objects may be copied or may be a reference to the \ ``Tensor``\ 's internal data buffer. \ ``shape``\  is always copied. | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| allocateByteBuffer | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static ByteBuffer allocateByteBuffer(int numElements) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Allocates a new direct \ :java:ref:`java.nio.ByteBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(ByteBuffer,long[])`\ , \ :java:ref:`Tensor.fromBlobUnsigned(ByteBuffer,long[])`\ . | ||||
|  | ||||
|    :param numElements: capacity (number of elements) of result buffer. | ||||
|  | ||||
| allocateDoubleBuffer | ||||
| ^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static DoubleBuffer allocateDoubleBuffer(int numElements) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Allocates a new direct \ :java:ref:`java.nio.DoubleBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(DoubleBuffer,long[])`\ . | ||||
|  | ||||
|    :param numElements: capacity (number of elements) of result buffer. | ||||
|  | ||||
| allocateFloatBuffer | ||||
| ^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static FloatBuffer allocateFloatBuffer(int numElements) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Allocates a new direct \ :java:ref:`java.nio.FloatBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(FloatBuffer,long[])`\ . | ||||
|  | ||||
|    :param numElements: capacity (number of elements) of result buffer. | ||||
|  | ||||
| allocateIntBuffer | ||||
| ^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static IntBuffer allocateIntBuffer(int numElements) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Allocates a new direct \ :java:ref:`java.nio.IntBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(IntBuffer,long[])`\ . | ||||
|  | ||||
|    :param numElements: capacity (number of elements) of result buffer. | ||||
|  | ||||
| allocateLongBuffer | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static LongBuffer allocateLongBuffer(int numElements) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Allocates a new direct \ :java:ref:`java.nio.LongBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(LongBuffer,long[])`\ . | ||||
|  | ||||
|    :param numElements: capacity (number of elements) of result buffer. | ||||
|  | ||||
| dtype | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: public abstract DType dtype() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :return: data type of this tensor. | ||||
|  | ||||
| dtypeJniCode | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method::  int dtypeJniCode() | ||||
|    :outertype: Tensor | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(byte[] data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.int8 with specified shape and data as array of bytes. | ||||
|  | ||||
|    :param data: Tensor elements | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(int[] data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.int32 with specified shape and data as array of ints. | ||||
|  | ||||
|    :param data: Tensor elements | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(float[] data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.float32 with specified shape and data as array of floats. | ||||
|  | ||||
|    :param data: Tensor elements | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(long[] data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.int64 with specified shape and data as array of longs. | ||||
|  | ||||
|    :param data: Tensor elements | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(long[] shape, double[] data) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.float64 with specified shape and data as array of doubles. | ||||
|  | ||||
|    :param shape: Tensor shape | ||||
|    :param data: Tensor elements | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(ByteBuffer data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.int8 with specified shape and data. | ||||
|  | ||||
|    :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor. | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(IntBuffer data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.int32 with specified shape and data. | ||||
|  | ||||
|    :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor. | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(FloatBuffer data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.float32 with specified shape and data. | ||||
|  | ||||
|    :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor. | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(LongBuffer data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.int64 with specified shape and data. | ||||
|  | ||||
|    :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor. | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlob | ||||
| ^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlob(DoubleBuffer data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.float64 with specified shape and data. | ||||
|  | ||||
|    :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor. | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlobUnsigned | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlobUnsigned(byte[] data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.uint8 with specified shape and data as array of bytes. | ||||
|  | ||||
|    :param data: Tensor elements | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| fromBlobUnsigned | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor fromBlobUnsigned(ByteBuffer data, long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Creates a new Tensor instance with dtype torch.uint8 with specified shape and data. | ||||
|  | ||||
|    :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor. | ||||
|    :param shape: Tensor shape | ||||
|  | ||||
| getDataAsByteArray | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public byte[] getDataAsByteArray() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :throws IllegalStateException: if it is called for a non-int8 tensor. | ||||
|    :return: a Java byte array that contains the tensor data. This may be a copy or reference. | ||||
|  | ||||
| getDataAsDoubleArray | ||||
| ^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public double[] getDataAsDoubleArray() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :throws IllegalStateException: if it is called for a non-float64 tensor. | ||||
|    :return: a Java double array that contains the tensor data. This may be a copy or reference. | ||||
|  | ||||
| getDataAsFloatArray | ||||
| ^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public float[] getDataAsFloatArray() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :throws IllegalStateException: if it is called for a non-float32 tensor. | ||||
|    :return: a Java float array that contains the tensor data. This may be a copy or reference. | ||||
|  | ||||
| getDataAsIntArray | ||||
| ^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public int[] getDataAsIntArray() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :throws IllegalStateException: if it is called for a non-int32 tensor. | ||||
|    :return: a Java int array that contains the tensor data. This may be a copy or reference. | ||||
|  | ||||
| getDataAsLongArray | ||||
| ^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public long[] getDataAsLongArray() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :throws IllegalStateException: if it is called for a non-int64 tensor. | ||||
|    :return: a Java long array that contains the tensor data. This may be a copy or reference. | ||||
|  | ||||
| getDataAsUnsignedByteArray | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public byte[] getDataAsUnsignedByteArray() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    :throws IllegalStateException: if it is called for a non-uint8 tensor. | ||||
|    :return: a Java byte array that contains the tensor data. This may be a copy or reference. | ||||
|  | ||||
| getRawDataBuffer | ||||
| ^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method::  Buffer getRawDataBuffer() | ||||
|    :outertype: Tensor | ||||
|  | ||||
| numel | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: public long numel() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Returns the number of elements in this tensor. | ||||
|  | ||||
| numel | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: public static long numel(long[] shape) | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Calculates the number of elements in a tensor with the specified shape. | ||||
|  | ||||
| shape | ||||
| ^^^^^ | ||||
|  | ||||
| .. java:method:: public long[] shape() | ||||
|    :outertype: Tensor | ||||
|  | ||||
|    Returns the shape of this tensor. (The array is a fresh copy.) | ||||
| @ -1,114 +0,0 @@ | ||||
| .. java:import:: android.graphics Bitmap | ||||
|  | ||||
| .. java:import:: android.graphics ImageFormat | ||||
|  | ||||
| .. java:import:: android.media Image | ||||
|  | ||||
| .. java:import:: org.pytorch Tensor | ||||
|  | ||||
| .. java:import:: java.nio ByteBuffer | ||||
|  | ||||
| .. java:import:: java.nio FloatBuffer | ||||
|  | ||||
| .. java:import:: java.util Locale | ||||
|  | ||||
| TensorImageUtils | ||||
| ================ | ||||
|  | ||||
| .. java:package:: org.pytorch.torchvision | ||||
|    :noindex: | ||||
|  | ||||
| .. java:type:: public final class TensorImageUtils | ||||
|  | ||||
|    Contains utility functions for \ :java:ref:`org.pytorch.Tensor`\  creation from \ :java:ref:`android.graphics.Bitmap`\  or \ :java:ref:`android.media.Image`\  source. | ||||
|  | ||||
| Fields | ||||
| ------ | ||||
| TORCHVISION_NORM_MEAN_RGB | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:field:: public static float[] TORCHVISION_NORM_MEAN_RGB | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
| TORCHVISION_NORM_STD_RGB | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:field:: public static float[] TORCHVISION_NORM_STD_RGB | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
| Methods | ||||
| ------- | ||||
| bitmapToFloat32Tensor | ||||
| ^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor bitmapToFloat32Tensor(Bitmap bitmap, float[] normMeanRGB, float[] normStdRGB) | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
|    Creates new \ :java:ref:`org.pytorch.Tensor`\  from full \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std. | ||||
|  | ||||
|    :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order | ||||
|    :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order | ||||
|  | ||||
| bitmapToFloat32Tensor | ||||
| ^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor bitmapToFloat32Tensor(Bitmap bitmap, int x, int y, int width, int height, float[] normMeanRGB, float[] normStdRGB) | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
|    Creates new \ :java:ref:`org.pytorch.Tensor`\  from specified area of \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std. | ||||
|  | ||||
|    :param bitmap: \ :java:ref:`android.graphics.Bitmap`\  as a source for Tensor data | ||||
|    :param x: - x coordinate of top left corner of bitmap's area | ||||
|    :param y: - y coordinate of top left corner of bitmap's area | ||||
|    :param width: - width of bitmap's area | ||||
|    :param height: - height of bitmap's area | ||||
|    :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order | ||||
|    :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order | ||||
|  | ||||
| bitmapToFloatBuffer | ||||
| ^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static void bitmapToFloatBuffer(Bitmap bitmap, int x, int y, int width, int height, float[] normMeanRGB, float[] normStdRGB, FloatBuffer outBuffer, int outBufferOffset) | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
|    Writes tensor content from specified \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std to specified \ :java:ref:`java.nio.FloatBuffer`\  with specified offset. | ||||
|  | ||||
|    :param bitmap: \ :java:ref:`android.graphics.Bitmap`\  as a source for Tensor data | ||||
|    :param x: - x coordinate of top left corner of bitmap's area | ||||
|    :param y: - y coordinate of top left corner of bitmap's area | ||||
|    :param width: - width of bitmap's area | ||||
|    :param height: - height of bitmap's area | ||||
|    :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order | ||||
|    :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order | ||||
|  | ||||
| imageYUV420CenterCropToFloat32Tensor | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static Tensor imageYUV420CenterCropToFloat32Tensor(Image image, int rotateCWDegrees, int tensorWidth, int tensorHeight, float[] normMeanRGB, float[] normStdRGB) | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
|    Creates new \ :java:ref:`org.pytorch.Tensor`\  from specified area of \ :java:ref:`android.media.Image`\ , doing optional rotation, scaling (nearest) and center cropping. | ||||
|  | ||||
|    :param image: \ :java:ref:`android.media.Image`\  as a source for Tensor data | ||||
|    :param rotateCWDegrees: Clockwise angle through which the input image needs to be rotated to be upright. Range of valid values: 0, 90, 180, 270 | ||||
|    :param tensorWidth: return tensor width, must be positive | ||||
|    :param tensorHeight: return tensor height, must be positive | ||||
|    :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order | ||||
|    :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order | ||||
|  | ||||
| imageYUV420CenterCropToFloatBuffer | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| .. java:method:: public static void imageYUV420CenterCropToFloatBuffer(Image image, int rotateCWDegrees, int tensorWidth, int tensorHeight, float[] normMeanRGB, float[] normStdRGB, FloatBuffer outBuffer, int outBufferOffset) | ||||
|    :outertype: TensorImageUtils | ||||
|  | ||||
|    Writes tensor content from specified \ :java:ref:`android.media.Image`\ , doing optional rotation, scaling (nearest) and center cropping to specified \ :java:ref:`java.nio.FloatBuffer`\  with specified offset. | ||||
|  | ||||
|    :param image: \ :java:ref:`android.media.Image`\  as a source for Tensor data | ||||
|    :param rotateCWDegrees: Clockwise angle through which the input image needs to be rotated to be upright. Range of valid values: 0, 90, 180, 270 | ||||
|    :param tensorWidth: return tensor width, must be positive | ||||
|    :param tensorHeight: return tensor height, must be positive | ||||
|    :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order | ||||
|    :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order | ||||
|    :param outBuffer: Output buffer, where tensor content will be written | ||||
|    :param outBufferOffset: Output buffer offset with which tensor content will be written | ||||
| @ -1,18 +0,0 @@ | ||||
| org.pytorch | ||||
| =========== | ||||
|  | ||||
| .. java:package:: org.pytorch | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 1 | ||||
|  | ||||
|    DType | ||||
|    IValue | ||||
|    Module | ||||
|    Tensor | ||||
|    Tensor-Tensor_float32 | ||||
|    Tensor-Tensor_float64 | ||||
|    Tensor-Tensor_int32 | ||||
|    Tensor-Tensor_int64 | ||||
|    Tensor-Tensor_int8 | ||||
|    Tensor-Tensor_uint8 | ||||
| @ -1,9 +0,0 @@ | ||||
| rg.pytorch.torchvision | ||||
| ======================= | ||||
|  | ||||
| .. java:package:: org.pytorch.torchvision | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 1 | ||||
|  | ||||
|    TensorImageUtils | ||||
| @ -1,7 +0,0 @@ | ||||
| Javadoc | ||||
| ======= | ||||
|  | ||||
| .. toctree:: | ||||
|    :maxdepth: 2 | ||||
|  | ||||
|    org/pytorch/package-index | ||||
| @ -42,6 +42,27 @@ The corresponding implementation is chosen automatically based on the PyTorch bu | ||||
|  | ||||
|   Quantization-aware training (through :class:`~torch.quantization.FakeQuantize`) supports both CPU and CUDA. | ||||
|  | ||||
|  | ||||
| .. note:: | ||||
|  | ||||
|    When preparing a quantized model, it is necessary to ensure that qconfig and the engine used for quantized computations match  | ||||
|    the backend on which the model will be executed. Quantization currently supports two backends: fbgemm (for use on x86,  | ||||
|    `<https://github.com/pytorch/FBGEMM>`_) and qnnpack (for use on the ARM QNNPACK library `<https://github.com/pytorch/QNNPACK>`_).  | ||||
|    For example, if you are interested in quantizing a model to run on ARM, it is recommended to set the qconfig by calling: | ||||
|  | ||||
|    ``qconfig = torch.quantization.get_default_qconfig('qnnpack')`` | ||||
|  | ||||
|    for post training quantization and | ||||
|  | ||||
|    ``qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')`` | ||||
|  | ||||
|    for quantization aware training. | ||||
|  | ||||
|    In addition, the torch.backends.quantized.engine parameter should be set to match the backend. For using qnnpack for inference, the  | ||||
|    backend is set to qnnpack as follows | ||||
|  | ||||
|    ``torch.backends.quantized.engine = 'qnnpack'`` | ||||
|  | ||||
| Quantized Tensors | ||||
| --------------------------------------- | ||||
|  | ||||
| @ -111,7 +132,7 @@ Operations that are available from the ``torch`` namespace or as methods on Tens | ||||
|  | ||||
| * :func:`~torch.quantize_per_tensor` - Convert float tensor to quantized tensor with per-tensor scale and zero point | ||||
| * :func:`~torch.quantize_per_channel` - Convert float tensor to quantized tensor with per-channel scale and zero point | ||||
| * View-based operations like :meth:`~torch.Tensor.view`, :meth:`~torch.Tensor.as_strided`, :meth:`~torch.Tensor.expand`, :meth:`~torch.Tensor.flatten`, :meth:`~torch.Tensor.slice`, python-style indexing, etc - work as on regular tensor (if quantization is not per-channel) | ||||
| * View-based operations like :meth:`~torch.Tensor.view`, :meth:`~torch.Tensor.as_strided`, :meth:`~torch.Tensor.expand`, :meth:`~torch.Tensor.flatten`, :meth:`~torch.Tensor.select`, python-style indexing, etc - work as on regular tensor (if quantization is not per-channel) | ||||
| * Comparators | ||||
|     * :meth:`~torch.Tensor.ne` — Not equal | ||||
|     * :meth:`~torch.Tensor.eq` — Equal | ||||
| @ -132,12 +153,24 @@ Operations that are available from the ``torch`` namespace or as methods on Tens | ||||
| * :meth:`~torch.Tensor.q_per_channel_scales` — Returns the scales of the per-channel quantized tensor | ||||
| * :meth:`~torch.Tensor.q_per_channel_zero_points` — Returns the zero points of the per-channel quantized tensor | ||||
| * :meth:`~torch.Tensor.q_per_channel_axis` — Returns the channel axis of the per-channel quantized tensor | ||||
| * :meth:`~torch.Tensor.relu` — Rectified linear unit (copy) | ||||
| * :meth:`~torch.Tensor.relu_` — Rectified linear unit (inplace) | ||||
| * :meth:`~torch.Tensor.resize_` — In-place resize | ||||
| * :meth:`~torch.Tensor.sort` — Sorts the tensor | ||||
| * :meth:`~torch.Tensor.topk` — Returns k largest values of a tensor | ||||
|  | ||||
| ``torch.nn.functional`` | ||||
| ~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| Basic activations are supported. | ||||
|  | ||||
| * :meth:`~torch.nn.functional.relu` — Rectified linear unit (copy) | ||||
| * :meth:`~torch.nn.functional.relu_` — Rectified linear unit (inplace) | ||||
| * :meth:`~torch.nn.functional.max_pool2d` - Maximum pooling  | ||||
| * :meth:`~torch.nn.functional.adaptive_avg_pool2d` - Adaptive average pooling | ||||
| * :meth:`~torch.nn.functional.avg_pool2d` - Average pooling | ||||
| * :meth:`~torch.nn.functional.interpolate` - Interpolation | ||||
| * :meth:`~torch.nn.functional.upsample` - Upsampling | ||||
| * :meth:`~torch.nn.functional.upsample_bilinear` - Bilinear Upsampling  | ||||
| * :meth:`~torch.nn.functional.upsample_nearest` - Upsampling Nearest | ||||
|  | ||||
| ``torch.nn.intrinsic`` | ||||
| ~~~~~~~~~~~~~~~~~~~~~~ | ||||
| @ -432,7 +465,7 @@ Debugging utilities | ||||
| .. autofunction:: get_observer_dict | ||||
| .. autoclass:: RecordingObserver | ||||
|  | ||||
| torch.nn.instrinsic | ||||
| torch.nn.intrinsic | ||||
| -------------------------------- | ||||
|  | ||||
| This module implements the combined (fused) modules conv + relu which can be then quantized. | ||||
| @ -546,6 +579,13 @@ Functional interface | ||||
| .. autofunction:: conv2d | ||||
| .. autofunction:: conv3d | ||||
| .. autofunction:: max_pool2d | ||||
| .. autofunction:: adaptive_avg_pool2d | ||||
| .. autofunction:: avg_pool2d | ||||
| .. autofunction:: interpolate | ||||
| .. autofunction:: upsample | ||||
| .. autofunction:: upsample_bilinear | ||||
| .. autofunction:: upsample_nearest | ||||
|  | ||||
|  | ||||
| .. automodule:: torch.nn.quantized | ||||
|  | ||||
|  | ||||
| @ -55,7 +55,7 @@ This library provides primitives allowing users to create and modify references | ||||
| .. autofunction:: rpc_async | ||||
| .. autofunction:: remote | ||||
| .. autofunction:: get_worker_info | ||||
| .. autofunction:: wait_all_workers | ||||
| .. autofunction:: shutdown | ||||
|  | ||||
| Distributed Autograd Framework | ||||
| ------------------------------ | ||||
|  | ||||
| @ -1,131 +0,0 @@ | ||||
| /** | ||||
|  * Copyright (c) 2016-present, Facebook, Inc. | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "batch_permutation_op.h" | ||||
| #ifdef CAFFE2_USE_MKLDNN | ||||
| #include <caffe2/ideep/operators/operator_fallback_ideep.h> | ||||
| #include <caffe2/ideep/utils/ideep_operator.h> | ||||
| #endif | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| #ifdef CAFFE2_USE_MKLDNN | ||||
| REGISTER_IDEEP_OPERATOR( | ||||
|     BatchPermutation, | ||||
|     IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>); | ||||
| #endif | ||||
|  | ||||
| REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>); | ||||
| REGISTER_CPU_OPERATOR( | ||||
|     BatchPermutationGradient, | ||||
|     BatchPermutationGradientOp<float, CPUContext>); | ||||
|  | ||||
| OPERATOR_SCHEMA(BatchPermutation) | ||||
|     .NumInputs(2) | ||||
|     .NumOutputs(1) | ||||
|     .SetDoc(R"DOC( | ||||
| Permute the batch elements of the input tensor X according to the permutation | ||||
| specified in the input indices. | ||||
|  | ||||
| Warning: this op does not verify that indices is a valid permutation; gradient | ||||
| comptuation is only correct if indices is a permutation. | ||||
| )DOC") | ||||
|     .Input( | ||||
|         0, | ||||
|         "X", | ||||
|         "Tensor of at least 1D shape (N, D0, D1, ...).") | ||||
|     .Input( | ||||
|         1, | ||||
|         "indices", | ||||
|         "1D tensor of type int with shape (N, ) specifying a valid permutation " | ||||
|         "of the indices in [0, N - 1] (inclusive).") | ||||
|     .Output( | ||||
|         0, | ||||
|         "Y", | ||||
|         "Tensor with the same shape as X where the (D0, D1, ...) dimensional " | ||||
|         "batch elements of X are permuted according to the input indices."); | ||||
|  | ||||
| OPERATOR_SCHEMA(BatchPermutationGradient) | ||||
|     .NumInputs(2) | ||||
|     .NumOutputs(1) | ||||
|     .Input( | ||||
|         0, | ||||
|         "indices", | ||||
|         "See BatchPermutation.") | ||||
|     .Input( | ||||
|         1, | ||||
|         "dY", | ||||
|         "Gradient of forward output 0 (Y).") | ||||
|     .Output( | ||||
|         0, | ||||
|         "dX", | ||||
|         "Gradient of forward input 0 (X)."); | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationOp<float, CPUContext>::RunOnDevice() { | ||||
|   const auto& X = Input(0); | ||||
|   const auto& indices = Input(1); | ||||
|  | ||||
|   CAFFE_ENFORCE_EQ(indices.dim(), 1, "indices must be 1-d"); | ||||
|   CAFFE_ENFORCE_EQ( | ||||
|     X.dim32(0), indices.dim32(0), | ||||
|     "X.dim32(0) must be equal to indices.dim32(0)", | ||||
|     "(", | ||||
|     X.dim32(0), | ||||
|     " vs. ", | ||||
|     indices.dim32(0), | ||||
|     ")"); | ||||
|  | ||||
|   auto* Y = Output(0, X.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   const int N = X.dim32(0); | ||||
|   const int C = X.dim32(1); | ||||
|   const int H = X.dim32(2); | ||||
|   const int W = X.dim32(3); | ||||
|  | ||||
|   const float *src = X.template data<float>(); | ||||
|   float *dst = Y->template mutable_data<float>(); | ||||
|  | ||||
| #ifdef _OPENMP | ||||
| #if (_OPENMP >= 201307) | ||||
| #pragma omp parallel for simd | ||||
| #else | ||||
| #pragma omp parallel for | ||||
| #endif  | ||||
| #endif   | ||||
|   for (int i = 0; i < N; i++) { | ||||
|     int idx = indices.template data<int>()[i]; | ||||
|  | ||||
|     std::memcpy(dst + i * C * H * W, src + idx * C * H * W, sizeof(float) * C * H * W); | ||||
|   } | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| class GetBatchPermutationGradient : public GradientMakerBase { | ||||
|   using GradientMakerBase::GradientMakerBase; | ||||
|   vector<OperatorDef> GetGradientDefs() override { | ||||
|     return SingleGradientDef( | ||||
|         "BatchPermutationGradient", | ||||
|         "", | ||||
|         vector<string>{I(1), GO(0)}, | ||||
|         vector<string>{GI(0)}); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient); | ||||
|  | ||||
| } // namespace caffe2 | ||||
| @ -1,112 +0,0 @@ | ||||
| /** | ||||
|  * Copyright (c) 2016-present, Facebook, Inc. | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "modules/detectron/batch_permutation_op.h" | ||||
| #include "caffe2/core/context_gpu.h" | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| namespace { | ||||
| template <bool forward> | ||||
| __global__ void BatchPermutationKernel( | ||||
|     int N, | ||||
|     int C, | ||||
|     int H, | ||||
|     int W, | ||||
|     const float* src, | ||||
|     const int* indices, | ||||
|     float* dst) { | ||||
|   CUDA_1D_KERNEL_LOOP(index, N * C * H * W) { | ||||
|     int w = index % W; | ||||
|     int h = (index / W) % H; | ||||
|     int c = (index / W / H) % C; | ||||
|     int n = (index / W / H / C); | ||||
|     int idx = indices[n]; | ||||
|     if (forward) { | ||||
|       dst[n * C * H * W + c * H * W + h * W + w] = | ||||
|           src[idx * C * H * W + c * H * W + h * W + w]; | ||||
|     } else { | ||||
|       dst[idx * C * H * W + c * H * W + h * W + w] = | ||||
|           src[n * C * H * W + c * H * W + h * W + w]; | ||||
|     } | ||||
|   } | ||||
| } | ||||
| } | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationOp<float, CUDAContext>::RunOnDevice() { | ||||
|   auto& X = Input(0); | ||||
|   auto& indices = Input(1); | ||||
|  | ||||
|  | ||||
|   CAFFE_ENFORCE(indices.ndim() == 1, "indices must be 1-d"); | ||||
|   CAFFE_ENFORCE( | ||||
|       X.dim32(0) == indices.dim32(0), | ||||
|       "X.dim32(0) must be equal to indices.dim32(0)", | ||||
|       "(", | ||||
|       X.dim32(0), | ||||
|       " vs. ", | ||||
|       indices.dim32(0), | ||||
|       ")"); | ||||
|  | ||||
|   auto* Y = Output(0, X.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   BatchPermutationKernel<true><<< | ||||
|       CAFFE_GET_BLOCKS(X.size()), | ||||
|       CAFFE_CUDA_NUM_THREADS, | ||||
|       0, | ||||
|       context_.cuda_stream()>>>( | ||||
|       X.dim32(0), | ||||
|       X.dim32(1), | ||||
|       X.dim32(2), | ||||
|       X.dim32(3), | ||||
|       X.data<float>(), | ||||
|       indices.data<int>(), | ||||
|       Y->mutable_data<float>()); | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| template <> | ||||
| bool BatchPermutationGradientOp<float, CUDAContext>::RunOnDevice() { | ||||
|   auto& indices = Input(0); | ||||
|   auto& dY = Input(1); | ||||
|  | ||||
|   auto* dX = Output(0, dY.sizes(), at::dtype<float>()); | ||||
|  | ||||
|   BatchPermutationKernel<false><<< | ||||
|       CAFFE_GET_BLOCKS(dY.size()), | ||||
|       CAFFE_CUDA_NUM_THREADS, | ||||
|       0, | ||||
|       context_.cuda_stream()>>>( | ||||
|       dY.dim32(0), | ||||
|       dY.dim32(1), | ||||
|       dY.dim32(2), | ||||
|       dY.dim32(3), | ||||
|       dY.data<float>(), | ||||
|       indices.data<int>(), | ||||
|       dX->mutable_data<float>()); | ||||
|  | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| REGISTER_CUDA_OPERATOR( | ||||
|     BatchPermutation, | ||||
|     BatchPermutationOp<float, CUDAContext>); | ||||
| REGISTER_CUDA_OPERATOR( | ||||
|     BatchPermutationGradient, | ||||
|     BatchPermutationGradientOp<float, CUDAContext>); | ||||
| } // namespace caffe2 | ||||
| @ -1,53 +0,0 @@ | ||||
| /** | ||||
|  * Copyright (c) 2016-present, Facebook, Inc. | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #ifndef BATCHPERMUTATION_OP_H_ | ||||
| #define BATCHPERMUTATION_OP_H_ | ||||
|  | ||||
| #include <cstring> | ||||
| #include "caffe2/core/context.h" | ||||
| #include "caffe2/core/logging.h" | ||||
| #include "caffe2/core/operator.h" | ||||
| #include "caffe2/utils/math.h" | ||||
|  | ||||
| namespace caffe2 { | ||||
|  | ||||
| template <typename T, class Context> | ||||
| class BatchPermutationOp final : public Operator<Context> { | ||||
|  public: | ||||
|   BatchPermutationOp(const OperatorDef& operator_def, Workspace* ws) | ||||
|       : Operator<Context>(operator_def, ws) {} | ||||
|   USE_OPERATOR_CONTEXT_FUNCTIONS; | ||||
|  | ||||
|   bool RunOnDevice() override; | ||||
| }; | ||||
|  | ||||
| template <typename T, class Context> | ||||
| class BatchPermutationGradientOp final : public Operator<Context> { | ||||
|  public: | ||||
|   BatchPermutationGradientOp(const OperatorDef& def, Workspace* ws) | ||||
|       : Operator<Context>(def, ws) {} | ||||
|   USE_OPERATOR_CONTEXT_FUNCTIONS; | ||||
|  | ||||
|   bool RunOnDevice() override { | ||||
|     // No CPU implementation for now | ||||
|     CAFFE_NOT_IMPLEMENTED; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } // namespace caffe2 | ||||
|  | ||||
| #endif // BATCHPERMUTATION_OP_H_ | ||||
| @ -17,17 +17,23 @@ from torch._C import parse_schema | ||||
| # | ||||
| # Whitelist entries can be removed after the date listed on them passes. | ||||
| white_list = [ | ||||
|     ('c10_experimental', datetime.date(2020, 1, 1)), | ||||
|     ('_batch_norm_impl_index', datetime.date(2019, 11, 15)), | ||||
|     ('_batch_norm_impl_index_backward', datetime.date(2019, 11, 15)), | ||||
|     ('cudnn_batch_norm', datetime.date(2019, 11, 15)), | ||||
|     ('cudnn_batch_norm_backward', datetime.date(2019, 11, 15)), | ||||
|     ('_nnpack_spatial_convolution', datetime.date(2019, 11, 12)), | ||||
|     ('_aten', datetime.date(2019, 12, 22)), | ||||
|     ('_prim::ListConstruct', datetime.date(2019, 11, 22)), | ||||
|     ('thnn_conv3d', datetime.date(9999, 1, 1)), | ||||
|     ('thnn_conv3d.out', datetime.date(9999, 1, 1)), | ||||
|     ('grad', datetime.date(2020, 1, 1)), | ||||
|     ("aten::append", datetime.date(9999, 1, 1)), | ||||
|     ("prim::AutogradAnyNonZero", datetime.date(9999, 1, 1)), | ||||
|     ("aten::grad", datetime.date(9999, 1, 1)), | ||||
|     ("_c10_experimental", datetime.date(9999, 1, 1)), | ||||
|     ("aten::thnn_conv3d", datetime.date(9999, 1, 1)), | ||||
|     ("aten::native_layer_norm_double_backward", datetime.date(9999, 1, 1)), | ||||
|     ("aten::cudnn_batch_norm", datetime.date(9999, 1, 1)), | ||||
|     ("aten::cudnn_batch_norm_backward", datetime.date(9999, 1, 1)), | ||||
|     ("aten::_batch_norm_impl_index_backward", datetime.date(9999, 1, 1)), | ||||
|     ("aten::empty_like", datetime.date(9999, 1, 1)), | ||||
|     ("aten::_batch_norm_impl_index", datetime.date(9999, 1, 1)), | ||||
|     ("aten::index_fill_", datetime.date(9999, 1, 1)), | ||||
|     ("aten::index_fill", datetime.date(9999, 1, 1)), | ||||
|     ("aten::log_softmax", datetime.date(9999, 1, 1)), | ||||
|     ("aten::softmax", datetime.date(9999, 1, 1)), | ||||
|     ("aten::thnn_conv3d_forward", datetime.date(9999, 1, 1)), | ||||
|     ("aten::thnn_conv3d_backward.output_mask", datetime.date(9999, 1, 1)), | ||||
| ] | ||||
|  | ||||
|  | ||||
| @ -43,6 +49,8 @@ def white_listed(schema, white_list): | ||||
|  | ||||
| def check_bc(new_schema_dict): | ||||
|     existing_schemas = torch._C._jit_get_all_schemas() | ||||
|     is_bc = True | ||||
|     broken_ops = [] | ||||
|     for existing_schema in existing_schemas: | ||||
|         if white_listed(existing_schema, white_list): | ||||
|             print("skipping schema: ", str(existing_schema)) | ||||
| @ -60,13 +68,17 @@ def check_bc(new_schema_dict): | ||||
|                   .format( | ||||
|                       str(existing_schema), | ||||
|                       "\n\t".join(str(s) for s in new_schemas))) | ||||
|             print('The PR is introducing backward incompatible changes to the ' | ||||
|                   'operator library. Please contact PyTorch team to confirm ' | ||||
|                   'whether this change is wanted or not.') | ||||
|             # TODO Print out more details about why candidates don't match. | ||||
|             return False | ||||
|     print('Found backward compatible schemas for all existing schemas') | ||||
|     return True | ||||
|             broken_ops.append(str(existing_schema)) | ||||
|             is_bc = False | ||||
|     if is_bc: | ||||
|         print('Found backward compatible schemas for all existing schemas') | ||||
|     else: | ||||
|         print('The PR is introducing backward incompatible changes to the ' | ||||
|               'operator library. Please contact PyTorch team to confirm ' | ||||
|               'whether this change is wanted or not. \n Broken ops: [\n{}]' | ||||
|               .format("\n".join(broken_ops))) | ||||
|     return is_bc | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
| @ -903,6 +903,15 @@ class TestCase(expecttest.TestCase): | ||||
|         # Don't put this in the try block; the AssertionError will catch it | ||||
|         self.fail(msg="Did not raise when expected to") | ||||
|  | ||||
|     def assertNotWarn(self, callable, msg=''): | ||||
|         r""" | ||||
|         Test if :attr:`callable` does not raise a warning. | ||||
|         """ | ||||
|         with self._reset_warning_registry(), warnings.catch_warnings(record=True) as ws: | ||||
|             warnings.simplefilter("always")  # allow any warning to be raised | ||||
|             callable() | ||||
|             self.assertTrue(len(ws) == 0, msg) | ||||
|  | ||||
|     def assertWarns(self, callable, msg=''): | ||||
|         r""" | ||||
|         Test if :attr:`callable` raises a warning. | ||||
|  | ||||
| @ -145,7 +145,7 @@ TEST_F(ModuleTest, RegisterParameterUndefinedTensor) { | ||||
|   { | ||||
|     TestModel model; | ||||
|     model.register_parameter("undefined_tensor", torch::Tensor(), /*requires_grad=*/false); | ||||
|     ASSERT_FALSE(model.named_parameters()["undefined_tensor"].defined()); | ||||
|     ASSERT_EQ(model.parameters().size(), 0); | ||||
|   } | ||||
|   { | ||||
|     std::stringstream buffer; | ||||
| @ -153,7 +153,7 @@ TEST_F(ModuleTest, RegisterParameterUndefinedTensor) { | ||||
|  | ||||
|     TestModel model; | ||||
|     model.register_parameter("undefined_tensor", torch::Tensor()); | ||||
|     ASSERT_FALSE(model.named_parameters()["undefined_tensor"].defined()); | ||||
|     ASSERT_EQ(model.parameters().size(), 0); | ||||
|  | ||||
|     ASSERT_EQ( | ||||
|       count_substr_occurrences( | ||||
| @ -221,6 +221,87 @@ TEST_F(ModuleTest, AsCastsModulesCorrectly) { | ||||
|   ASSERT_EQ(unit.as<AGIUnit>(), &unit); | ||||
| } | ||||
|  | ||||
| void test_DeviceOrDtypeConversionSkipsUndefinedTensor( | ||||
|   torch::Device to_device, torch::Dtype to_dtype) { | ||||
|   { | ||||
|     // Case 1: Undefined tensors as parameters | ||||
|     Linear module(LinearOptions(10, 20).bias(false)); | ||||
|     ASSERT_TRUE(module->weight.defined()); | ||||
|     ASSERT_FALSE(module->bias.defined()); | ||||
|  | ||||
|     module->to(to_device); | ||||
|     ASSERT_TRUE(module->weight.defined()); | ||||
|     ASSERT_EQ(module->weight.device().type(), to_device.type()); | ||||
|     ASSERT_FALSE(module->bias.defined()); | ||||
|  | ||||
|     module->to(to_dtype); | ||||
|     ASSERT_TRUE(module->weight.defined()); | ||||
|     ASSERT_EQ(module->weight.dtype(), to_dtype); | ||||
|     ASSERT_FALSE(module->bias.defined()); | ||||
|   } | ||||
|   { | ||||
|     // Case 2: Undefined tensors as buffers | ||||
|     BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(false).affine(true)); | ||||
|     ASSERT_TRUE(module->weight.defined()); | ||||
|     ASSERT_FALSE(module->running_mean.defined()); | ||||
|  | ||||
|     module->to(to_device); | ||||
|     ASSERT_TRUE(module->weight.defined()); | ||||
|     ASSERT_EQ(module->weight.device().type(), to_device.type()); | ||||
|     ASSERT_FALSE(module->running_mean.defined()); | ||||
|  | ||||
|     module->to(to_dtype); | ||||
|     ASSERT_TRUE(module->weight.defined()); | ||||
|     ASSERT_EQ(module->weight.dtype(), to_dtype); | ||||
|     ASSERT_FALSE(module->running_mean.defined()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| TEST_F(ModuleTest, DeviceOrDtypeConversionSkipsUndefinedTensor) { | ||||
|   test_DeviceOrDtypeConversionSkipsUndefinedTensor(torch::kCPU, torch::kDouble); | ||||
| } | ||||
|  | ||||
| TEST_F(ModuleTest, DeviceOrDtypeConversionSkipsUndefinedTensor_CUDA) { | ||||
|   test_DeviceOrDtypeConversionSkipsUndefinedTensor(torch::kCUDA, torch::kDouble); | ||||
| } | ||||
|  | ||||
| TEST_F(ModuleTest, ParametersAndBuffersAccessorSkipsUndefinedTensor) { | ||||
|   { | ||||
|     Linear module(LinearOptions(10, 20).bias(false)); | ||||
|  | ||||
|     auto params = module->parameters(); | ||||
|     ASSERT_EQ(params.size(), 1); | ||||
|     auto named_params = module->named_parameters(); | ||||
|     ASSERT_EQ(named_params.size(), 1); | ||||
|  | ||||
|     ASSERT_TRUE(pointer_equal(params[0], named_params["weight"])); | ||||
|     ASSERT_TRUE(pointer_equal(named_params["weight"], module->weight)); | ||||
|   } | ||||
|   { | ||||
|     BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(false).affine(false)); | ||||
|  | ||||
|     auto buffers = module->buffers(); | ||||
|     ASSERT_EQ(buffers.size(), 0); | ||||
|     auto named_buffers = module->named_buffers(); | ||||
|     ASSERT_EQ(named_buffers.size(), 0); | ||||
|   } | ||||
|   { | ||||
|     BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(true).affine(false)); | ||||
|  | ||||
|     auto buffers = module->buffers(); | ||||
|     ASSERT_EQ(buffers.size(), 3); | ||||
|     auto named_buffers = module->named_buffers(); | ||||
|     ASSERT_EQ(named_buffers.size(), 3); | ||||
|  | ||||
|     ASSERT_TRUE(pointer_equal(buffers[0], named_buffers["running_mean"])); | ||||
|     ASSERT_TRUE(pointer_equal(named_buffers["running_mean"], module->running_mean)); | ||||
|     ASSERT_TRUE(pointer_equal(buffers[1], named_buffers["running_var"])); | ||||
|     ASSERT_TRUE(pointer_equal(named_buffers["running_var"], module->running_var)); | ||||
|     ASSERT_TRUE(pointer_equal(buffers[2], named_buffers["num_batches_tracked"])); | ||||
|     ASSERT_TRUE(pointer_equal(named_buffers["num_batches_tracked"], module->num_batches_tracked)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| TEST_F(ModuleTest, Conversion_MultiCUDA) { | ||||
|   Linear module(128, 64); | ||||
|   for (auto& parameter : module->parameters()) { | ||||
|  | ||||
| @ -46,7 +46,7 @@ private: | ||||
| }; | ||||
|  | ||||
| inline bool pointer_equal(at::Tensor first, at::Tensor second) { | ||||
|   return first.data_ptr<float>() == second.data_ptr<float>(); | ||||
|   return first.data_ptr() == second.data_ptr(); | ||||
| } | ||||
|  | ||||
| inline int count_substr_occurrences(const std::string& str, const std::string& substr) { | ||||
|  | ||||
| @ -1360,7 +1360,7 @@ class DistAutogradTest(RpcAgentTestFixture): | ||||
|         # receive gradients from the node that received an error (and as a | ||||
|         # result it didn't execute the rest of the graph). | ||||
|         dist.barrier() | ||||
|         rpc.wait_all_workers() | ||||
|         rpc.shutdown() | ||||
|         sys.exit(0) | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -72,6 +72,11 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True): | ||||
|  | ||||
|     @wraps(old_test_method) | ||||
|     def new_test_method(self, *arg, **kwargs): | ||||
|         # Setting _ignore_rref_leak to make sure OwnerRRefs are properly deleted | ||||
|         # in tests. | ||||
|         import torch.distributed.rpc.api as api | ||||
|         api._ignore_rref_leak = False | ||||
|  | ||||
|         self.worker_id = self.rank | ||||
|  | ||||
|         if setup_rpc: | ||||
| @ -83,7 +88,6 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True): | ||||
|             rpc.init_rpc( | ||||
|                 name="worker%d" % self.rank, | ||||
|                 backend=self.rpc_backend, | ||||
|                 init_method=self.init_method, | ||||
|                 rank=self.rank, | ||||
|                 world_size=self.world_size, | ||||
|                 rpc_backend_options=self.rpc_backend_options, | ||||
| @ -123,7 +127,7 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True): | ||||
|             # since we need to shutdown the RPC agent. If we don't shutdown the | ||||
|             # RPC agent, tests would fail since RPC agent threads, locks and | ||||
|             # condition variables are not properly terminated. | ||||
|             rpc.wait_all_workers() | ||||
|             rpc.shutdown() | ||||
|  | ||||
|         return return_value | ||||
|  | ||||
| @ -134,6 +138,7 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True): | ||||
| TEST_CONFIG.rpc_backend_name = "PROCESS_GROUP" | ||||
| TEST_CONFIG.build_rpc_backend_options = lambda test_object: rpc.backend_registry.construct_rpc_backend_options( | ||||
|     test_object.rpc_backend, | ||||
|     init_method=test_object.init_method, | ||||
|     # Use enough 'num_send_recv_threads' until we fix https://github.com/pytorch/pytorch/issues/26359 | ||||
|     num_send_recv_threads=16, | ||||
| ) | ||||
|  | ||||
| @ -6,7 +6,7 @@ import hypothesis | ||||
| from hypothesis import assume | ||||
| from hypothesis import strategies as st | ||||
| from hypothesis.extra import numpy as stnp | ||||
| from hypothesis.searchstrategy import SearchStrategy | ||||
| from hypothesis.strategies import SearchStrategy | ||||
|  | ||||
| from common_quantized import _calculate_dynamic_qparams, _calculate_dynamic_per_channel_qparams | ||||
|  | ||||
| @ -304,10 +304,11 @@ def tensor_conv( | ||||
|  | ||||
|     return X, W, b, groups | ||||
|  | ||||
| # Disable deadline testing if this version of hypthesis supports it, otherwise | ||||
| # just return the original function | ||||
| def no_deadline(fn): | ||||
|     try: | ||||
|         return hypothesis.settings(deadline=None)(fn) | ||||
|     except hypothesis.errors.InvalidArgument: | ||||
|         return fn | ||||
| from hypothesis import settings | ||||
| settings.register_profile("no_deadline", deadline=None) | ||||
| settings.load_profile("no_deadline") | ||||
|  | ||||
| # This is really just to get flake8 to not complain when this file | ||||
| # is imported purely for the side-effectful stuff above | ||||
| def assert_deadline_disabled(): | ||||
|     assert settings().deadline is None | ||||
|  | ||||
| @ -183,31 +183,6 @@ class TestONNXRuntime(unittest.TestCase): | ||||
|         x = torch.randn(2, 3, 224, 224, requires_grad=True) | ||||
|         self.run_test(model, (x,), rtol=1e-3, atol=1e-5) | ||||
|  | ||||
|     def test_googlenet_quantization(self): | ||||
|         model = torchvision.models.quantization.googlenet(pretrained=True) | ||||
|         x = torch.randn(2, 3, 224, 224, requires_grad=True) | ||||
|         self.run_test(model, (x,), rtol=1e-3, atol=1e-5) | ||||
|  | ||||
|     def test_inception_quantization(self): | ||||
|         model = torchvision.models.quantization.inception_v3(pretrained=True) | ||||
|         x = torch.randn(2, 3, 224, 224, requires_grad=True) | ||||
|         self.run_test(model, (x,), rtol=1e-3, atol=1e-5) | ||||
|  | ||||
|     def test_mobilenet_quantization(self): | ||||
|         model = torchvision.models.quantization.mobilenet_v2(pretrained=True) | ||||
|         x = torch.randn(2, 3, 224, 224, requires_grad=True) | ||||
|         self.run_test(model, (x,), rtol=1e-3, atol=1e-5) | ||||
|  | ||||
|     def test_resnet_quantization(self): | ||||
|         model = torchvision.models.quantization.resnet50(pretrained=True) | ||||
|         x = torch.randn(2, 3, 224, 224, requires_grad=True) | ||||
|         self.run_test(model, (x,)) | ||||
|  | ||||
|     def test_shufflenet_quantization(self): | ||||
|         model = torchvision.models.quantization.shufflenet_v2_x1_0(pretrained=True) | ||||
|         x = torch.randn(2, 3, 224, 224, requires_grad=True) | ||||
|         self.run_test(model, (x,), rtol=1e-3, atol=1e-5) | ||||
|  | ||||
|     def test_r3d_18_video(self): | ||||
|         model = torchvision.models.video.r3d_18(pretrained=True) | ||||
|         x = torch.randn(1, 3, 4, 112, 112, requires_grad=True) | ||||
| @ -238,6 +213,55 @@ class TestONNXRuntime(unittest.TestCase): | ||||
|         # Only support CPU version, since tracer is not working in GPU RNN. | ||||
|         self.run_test(model, (x, model.hidden)) | ||||
|  | ||||
|     def get_image_from_url(self, url): | ||||
|         import sys | ||||
|         import os | ||||
|         if sys.version_info < (3,): | ||||
|             from urlparse import urlsplit | ||||
|             import urllib2 | ||||
|             request = urllib2 | ||||
|         else: | ||||
|             from urllib.parse import urlsplit | ||||
|             from urllib import request | ||||
|         from PIL import Image | ||||
|         from torchvision import transforms | ||||
|         from torch._utils_internal import get_writable_path | ||||
|  | ||||
|         filename = os.path.basename(urlsplit(url)[2]) | ||||
|         data_dir = get_writable_path(os.path.join(os.path.dirname(__file__))) | ||||
|         path = os.path.join(data_dir, filename) | ||||
|         data = request.urlopen(url, timeout=15).read() | ||||
|         with open(path, 'wb') as f: | ||||
|             f.write(data) | ||||
|         image = Image.open(path).convert("RGB") | ||||
|         image = image.resize((300, 200), Image.BILINEAR) | ||||
|         to_tensor = transforms.ToTensor() | ||||
|         return to_tensor(image) | ||||
|  | ||||
|     def get_test_images(self): | ||||
|         image_url = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg" | ||||
|         image = self.get_image_from_url(url=image_url) | ||||
|         images = [image] | ||||
|         return images | ||||
|  | ||||
|     @skipIfUnsupportedMinOpsetVersion(11) | ||||
|     def test_keypoint_rcnn(self): | ||||
|         class KeyPointRCNN(torch.nn.Module): | ||||
|             def __init__(self): | ||||
|                 super(KeyPointRCNN, self).__init__() | ||||
|                 self.model = torchvision.models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True, | ||||
|                                                                                                   min_size=200, | ||||
|                                                                                                   max_size=300) | ||||
|  | ||||
|             def forward(self, images): | ||||
|                 output = self.model(images) | ||||
|                 # TODO: The keypoints_scores require the use of Argmax that is updated in ONNX. | ||||
|                 #       For now we are testing all the output of KeypointRCNN except keypoints_scores. | ||||
|                 #       Enable When Argmax is updated in ONNX Runtime. | ||||
|                 return output[0]['boxes'], output[0]['labels'], output[0]['scores'], output[0]['keypoints'] | ||||
|         images = self.get_test_images() | ||||
|         self.run_test(KeyPointRCNN(), (images,), rtol=1e-3, atol=1e-5) | ||||
|  | ||||
|     def test_word_language_model_RNN_TANH(self): | ||||
|         self.run_word_language_model("RNN_TANH") | ||||
|  | ||||
|  | ||||
							
								
								
									
										148
									
								
								test/rpc_test.py
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								test/rpc_test.py
									
									
									
									
									
								
							| @ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera | ||||
| import concurrent.futures | ||||
| from datetime import timedelta | ||||
| import sys | ||||
| import time | ||||
| import unittest | ||||
| from collections import namedtuple | ||||
| from unittest import mock | ||||
| @ -18,6 +19,21 @@ from torch.distributed.rpc.api import _use_rpc_pickler | ||||
| from torch.distributed.rpc.internal import PythonUDF, _internal_rpc_pickler | ||||
| from rpc_agent_test_fixture import RpcAgentTestFixture | ||||
|  | ||||
| rpc_done = [False, False, False, False] | ||||
|  | ||||
| # TODO: dedupe this with the code in dist_autograd_test.py. | ||||
| # Send rpc done info and context_id to | ||||
| # dst_rank = (self.rank + rank_distance) % self.world_size | ||||
| # we don't need a lock here since the GIL is held while executing remote | ||||
| # python UDFs, so access is serialized across several workers. | ||||
| def _set_rpc_done(rank_distance): | ||||
|     global rpc_done | ||||
|     rpc_done[rank_distance] = True | ||||
|  | ||||
| def _check_rpc_done(rank_distance): | ||||
|     while not rpc_done[rank_distance]: | ||||
|         # yield control to other threads | ||||
|         time.sleep(0) | ||||
|  | ||||
| def requires_process_group_agent(message=""): | ||||
|     def decorator(old_func): | ||||
| @ -127,7 +143,6 @@ def my_tensor_function(a, b): | ||||
|     return a + b | ||||
|  | ||||
| def my_sleep_func(seconds=1): | ||||
|     import time | ||||
|     time.sleep(seconds) | ||||
|  | ||||
|  | ||||
| @ -306,7 +321,6 @@ class RpcTest(RpcAgentTestFixture): | ||||
|         rpc.init_rpc( | ||||
|             name="worker1", | ||||
|             backend=backend, | ||||
|             init_method=self.init_method, | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options, | ||||
| @ -327,14 +341,13 @@ class RpcTest(RpcAgentTestFixture): | ||||
|                 world_size=self.world_size, | ||||
|                 rpc_backend_options=self.rpc_backend_options, | ||||
|             ) | ||||
|         rpc.wait_all_workers() | ||||
|         rpc.shutdown() | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     def test_reinit(self): | ||||
|         rpc.init_rpc( | ||||
|             name="worker{}".format(self.rank), | ||||
|             backend=self.rpc_backend, | ||||
|             init_method=self.init_method, | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options, | ||||
| @ -357,13 +370,13 @@ class RpcTest(RpcAgentTestFixture): | ||||
|             rpc.init_rpc( | ||||
|                 name="worker{}".format(self.rank), | ||||
|                 backend=self.rpc_backend, | ||||
|                 init_method=self.init_method, | ||||
|                 rank=self.rank, | ||||
|                 world_size=self.world_size, | ||||
|                 rpc_backend_options=self.rpc_backend_options, | ||||
|             ) | ||||
|         rpc.wait_all_workers() | ||||
|         rpc.shutdown() | ||||
|  | ||||
|     @unittest.skip("test_invalid_names is flaky, see https://github.com/pytorch/pytorch/issues/25912") | ||||
|     @dist_init(setup_rpc=False) | ||||
|     def test_invalid_names(self): | ||||
|         with self.assertRaisesRegex(RuntimeError, "Worker name must match"): | ||||
| @ -430,8 +443,8 @@ class RpcTest(RpcAgentTestFixture): | ||||
|  | ||||
|         from torch.distributed.rpc.api import _agent | ||||
|         self.assertEqual(_agent, None) | ||||
|         # wait_all_workers() should not do anything as _agent is None | ||||
|         rpc.wait_all_workers() | ||||
|         # shutdown() should not do anything as _agent is None | ||||
|         rpc.shutdown() | ||||
|         # We need this barrier here because although init_process_group is | ||||
|         # blocking, it does not guarantee that all ranks are done with | ||||
|         # initialization after the call. We did run into issues with it where | ||||
| @ -508,12 +521,11 @@ class RpcTest(RpcAgentTestFixture): | ||||
|             self.assertEqual(ret, torch.ones(n, n) * 2) | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     def test_wait_all_workers(self): | ||||
|     def test_shutdown(self): | ||||
|         # Initialize RPC. | ||||
|         rpc.init_rpc( | ||||
|             name="worker%d" % self.rank, | ||||
|             backend=self.rpc_backend, | ||||
|             init_method=self.init_method, | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options, | ||||
| @ -527,7 +539,7 @@ class RpcTest(RpcAgentTestFixture): | ||||
|             args=(torch.ones(n, n), torch.ones(n, n)), | ||||
|         ) | ||||
|         self.assertEqual(ret, torch.ones(n, n) * 2) | ||||
|         rpc.wait_all_workers() | ||||
|         rpc.shutdown() | ||||
|  | ||||
|         with self.assertRaisesRegex(RuntimeError, "^RPC has not been initialized"): | ||||
|             rpc.rpc_sync( | ||||
| @ -536,8 +548,8 @@ class RpcTest(RpcAgentTestFixture): | ||||
|                 args=(torch.ones(n, n), torch.ones(n, n)), | ||||
|             ) | ||||
|  | ||||
|         # it's safe to call wait_all_workers() multiple times | ||||
|         rpc.wait_all_workers() | ||||
|         # it's safe to call shutdown() multiple times | ||||
|         rpc.shutdown() | ||||
|  | ||||
|     @dist_init | ||||
|     def test_expected_src(self): | ||||
| @ -701,8 +713,6 @@ class RpcTest(RpcAgentTestFixture): | ||||
|         self.assertEqual(ret, torch.ones(2, 2) + 1) | ||||
|  | ||||
|     def _stress_test_rpc(self, f, repeat=1000, args=()): | ||||
|         import time | ||||
|  | ||||
|         n = self.rank + 1 | ||||
|         dst_rank = n % self.world_size | ||||
|         futs = [] | ||||
| @ -1090,6 +1100,111 @@ class RpcTest(RpcAgentTestFixture): | ||||
|  | ||||
|         self.assertEqual(result, sum(vals)) | ||||
|  | ||||
|     def _test_rref_leak(self, ignore_leak=False): | ||||
|         rpc.init_rpc( | ||||
|             name="worker{}".format(self.rank), | ||||
|             backend=self.rpc_backend, | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options, | ||||
|         ) | ||||
|  | ||||
|         # This is for the below `dist.barrier`. | ||||
|         # For `RpcAgent` other than `ProcessGroupAgent`, | ||||
|         # no `_default_pg` is initialized. | ||||
|         if not dist.is_initialized(): | ||||
|             dist.init_process_group( | ||||
|                 backend="gloo", | ||||
|                 init_method=self.init_method, | ||||
|                 rank=self.rank, | ||||
|                 world_size=self.world_size, | ||||
|             ) | ||||
|         # Wait for all init to complete. | ||||
|         dist.barrier() | ||||
|  | ||||
|         rref = rpc.remote( | ||||
|             "worker{}".format((self.rank + 1) % self.world_size), | ||||
|             torch.add, | ||||
|             args=(torch.ones(2, 2), 1) | ||||
|         ) | ||||
|  | ||||
|         if ignore_leak: | ||||
|             import torch.distributed.rpc.api as api | ||||
|             api._ignore_rref_leak = True | ||||
|  | ||||
|         rpc.shutdown() | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     def test_rref_leak(self): | ||||
|         with self.assertRaisesRegex(RuntimeError, "Leaking RRef"): | ||||
|             self._test_rref_leak() | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     def test_ignore_rref_leak(self): | ||||
|         self._test_rref_leak(ignore_leak=True) | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip") | ||||
|     def test_local_shutdown(self): | ||||
|         # test that we can start RPC and then immediately locally shutdown | ||||
|         # without sending any messages. | ||||
|         rpc.init_rpc( | ||||
|             name="worker%d" % self.rank, | ||||
|             backend=rpc.backend_registry.BackendType[ | ||||
|                 dist_utils.TEST_CONFIG.rpc_backend_name | ||||
|             ], | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options, | ||||
|         ) | ||||
|         # pass in graceful=False to ensure that we don't wait for other workers. | ||||
|         rpc.shutdown(graceful=False) | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip") | ||||
|     def test_local_shutdown_with_rpc(self): | ||||
|         # test that we can start RPC, send RPCs, and then run local shutdown. | ||||
|         rpc.init_rpc( | ||||
|             name="worker%d" % self.rank, | ||||
|             backend=rpc.backend_registry.BackendType[ | ||||
|                 dist_utils.TEST_CONFIG.rpc_backend_name | ||||
|             ], | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options, | ||||
|         ) | ||||
|         n = self.rank + 1 | ||||
|         dst_rank = n % self.world_size | ||||
|         ret = rpc.rpc_sync( | ||||
|             "worker{}".format(dst_rank), | ||||
|             torch.add, | ||||
|             args=(torch.ones(n, n), torch.ones(n, n)), | ||||
|         ) | ||||
|         # wait for RPCs to be done, so that some workers don't try to shut down | ||||
|         # too early. | ||||
|         rpc.rpc_sync("worker{}".format(dst_rank), _set_rpc_done, args=(1,)) | ||||
|         _check_rpc_done(1) | ||||
|         # pass in graceful=False to ensure that we don't wait for other workers. | ||||
|         rpc.shutdown(graceful=False) | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip") | ||||
|     def test_wait_all_workers_and_shutdown(self): | ||||
|         # This tests ensures that both rpc._wait_all_workers() and rpc.shutdown() can be | ||||
|         # called without errors being raised due to attempting to shut down | ||||
|         # multiple times. | ||||
|         rpc.init_rpc( | ||||
|             name="worker%d" % self.rank, | ||||
|             backend=rpc.backend_registry.BackendType[dist_utils.TEST_CONFIG.rpc_backend_name], | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=self.rpc_backend_options | ||||
|         ) | ||||
|         from torch.distributed.rpc.api import _wait_all_workers | ||||
|         # intentional call to internal _wait_all_workers. | ||||
|         _wait_all_workers() | ||||
|         rpc.shutdown() | ||||
|  | ||||
|     @dist_init(setup_rpc=False) | ||||
|     def test_get_rpc_timeout(self): | ||||
|         timeout = timedelta(seconds=1) | ||||
| @ -1102,14 +1217,13 @@ class RpcTest(RpcAgentTestFixture): | ||||
|         rpc.init_rpc( | ||||
|             name="worker{}".format(self.rank), | ||||
|             backend=self.rpc_backend, | ||||
|             init_method=self.init_method, | ||||
|             rank=self.rank, | ||||
|             world_size=self.world_size, | ||||
|             rpc_backend_options=rpc_backend_options, | ||||
|         ) | ||||
|         set_timeout = rpc.get_rpc_timeout() | ||||
|         self.assertEqual(timeout, set_timeout) | ||||
|         rpc.wait_all_workers() | ||||
|         rpc.shutdown() | ||||
|  | ||||
|     @dist_init | ||||
|     @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip") | ||||
|  | ||||
| @ -422,6 +422,9 @@ class WorkerSpecificIterableDataset(IterableDataset): | ||||
|         assert worker_info is not None | ||||
|         return iter(range(self.sizes_for_all_workers[worker_info.id])) | ||||
|  | ||||
|     def __len__(self): | ||||
|         return sum(self.sizes_for_all_workers) | ||||
|  | ||||
|  | ||||
| # Inspired by https://stackoverflow.com/a/26703365 | ||||
| # If all workers will call `sync_once`, they will be blocked until all workers | ||||
| @ -961,8 +964,8 @@ class TestDataLoader(TestCase): | ||||
|             # non-batched should not convert ints into tensors | ||||
|             self.assertIsInstance(d, torch._six.int_classes) | ||||
|             self.assertEqual(d, i) | ||||
|         with self.assertRaisesRegex(TypeError, "Cannot determine the DataLoader length of a IterableDataset"): | ||||
|             len(dataloader)  # DataLoader with iterable-style dataset should error in __len__ | ||||
|         # DataLoader should match len of the iterable-style dataset (if implemented) | ||||
|         self.assertEqual(len(dataloader), len(dataset)) | ||||
|  | ||||
|         # [no auto-batching] multiprocessing loading | ||||
|         num_workers = 3 | ||||
| @ -978,8 +981,26 @@ class TestDataLoader(TestCase): | ||||
|             # non-batched should not convert ints into tensors | ||||
|             self.assertIsInstance(a, torch._six.int_classes) | ||||
|             self.assertEqual(a, b) | ||||
|         with self.assertRaisesRegex(TypeError, "Cannot determine the DataLoader length of a IterableDataset"): | ||||
|             len(dataloader)  # DataLoader with iterable-style dataset should error in __len__ | ||||
|         # DataLoader should match len of the iterable-style dataset (if implemented) | ||||
|         self.assertEqual(len(dataloader), len(dataset)) | ||||
|         # When loading more than len(dataset) data, after accessing len(dataloader), | ||||
|         # we should get a warning. See NOTE [ IterableDataset and __len__ ]. | ||||
|         dataset = CountingIterableDataset(20) | ||||
|         dataloader = DataLoader(dataset, num_workers=num_workers, | ||||
|                                 worker_init_fn=set_faulthander_if_available) | ||||
|         it = iter(dataloader) | ||||
|         for _ in range(40): | ||||
|             self.assertNotWarn(lambda: next(it), "Should not warn before accessing len(dataloader)") | ||||
|         self.assertEqual(len(dataloader), len(dataset)) | ||||
|         self.assertEqual(len(dataloader), 20) | ||||
|         it = iter(dataloader) | ||||
|         for _ in range(20): | ||||
|             self.assertNotWarn(lambda: next(it), "Should not warn before exceeding length") | ||||
|         for _ in range(3): | ||||
|             self.assertWarnsRegex( | ||||
|                 lambda: next(it), | ||||
|                 r"but [0-9]+ samples have been fetched\. For multiprocessing data-loading, this", | ||||
|                 "Should always warn after exceeding length") | ||||
|  | ||||
|         # [no auto-batching] test that workers exit gracefully | ||||
|         workers = dataloader_iter._workers | ||||
|  | ||||
| @ -5,7 +5,7 @@ import numpy as np | ||||
| from hypothesis import given | ||||
| from hypothesis import strategies as st | ||||
| import hypothesis_utils as hu | ||||
| from hypothesis_utils import no_deadline | ||||
| hu.assert_deadline_disabled() | ||||
| from common_utils import run_tests, TestCase | ||||
| from torch.quantization import FakeQuantize | ||||
| from torch.quantization import default_observer, default_per_channel_weight_observer | ||||
| @ -64,10 +64,8 @@ NP_RANDOM_SEED = 19 | ||||
| tolerance = 1e-6 | ||||
|  | ||||
| class TestFakeQuantizePerTensor(TestCase): | ||||
|     # NOTE: Tests in this class are decorated with no_deadline | ||||
|     # to prevent spurious failures due to cuda runtime initialization. | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.tensor(shapes=hu.array_shapes(1, 5,), | ||||
|                        qparams=hu.qparams(dtypes=torch.quint8))) | ||||
| @ -85,7 +83,7 @@ class TestFakeQuantizePerTensor(TestCase): | ||||
|             X, scale, zero_point, quant_min, quant_max) | ||||
|         np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.tensor(shapes=hu.array_shapes(1, 5,), | ||||
|                        qparams=hu.qparams(dtypes=torch.quint8))) | ||||
| @ -108,7 +106,8 @@ class TestFakeQuantizePerTensor(TestCase): | ||||
|         Y_prime.backward(dout) | ||||
|         np.testing.assert_allclose(dX.cpu(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) | ||||
|  | ||||
|     @no_deadline | ||||
|     # https://github.com/pytorch/pytorch/issues/30604 | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.tensor(shapes=hu.array_shapes(1, 5,), | ||||
|                        qparams=hu.qparams(dtypes=torch.quint8))) | ||||
| @ -127,7 +126,7 @@ class TestFakeQuantizePerTensor(TestCase): | ||||
|             X, scale, zero_point, quant_min, quant_max) | ||||
|         np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.tensor(shapes=hu.array_shapes(1, 5,), | ||||
|                        qparams=hu.qparams(dtypes=[torch.quint8])), | ||||
| @ -206,10 +205,8 @@ class TestFakeQuantizePerTensor(TestCase): | ||||
|  | ||||
|  | ||||
| class TestFakeQuantizePerChannel(TestCase): | ||||
|     # NOTE: Tests in this class are decorated with no_deadline | ||||
|     # to prevent spurious failures due to cuda runtime initialization. | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,), | ||||
|            qparams=hu.qparams(dtypes=torch.quint8))) | ||||
| @ -229,7 +226,7 @@ class TestFakeQuantizePerChannel(TestCase): | ||||
|             X, scale, zero_point, axis, quant_min, quant_max) | ||||
|         np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,), | ||||
|            qparams=hu.qparams(dtypes=torch.quint8))) | ||||
| @ -253,11 +250,10 @@ class TestFakeQuantizePerChannel(TestCase): | ||||
|         Y_prime.backward(dout) | ||||
|         np.testing.assert_allclose(dX.cpu().detach().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,), | ||||
|            qparams=hu.qparams(dtypes=torch.quint8))) | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     def test_numerical_consistency_per_channel(self, device, X): | ||||
|         r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op | ||||
|         """ | ||||
| @ -275,7 +271,7 @@ class TestFakeQuantizePerChannel(TestCase): | ||||
|             X, scale, zero_point, axis, quant_min, quant_max) | ||||
|         np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) | ||||
|  | ||||
|     @no_deadline | ||||
|     @unittest.skip("temporarily disable the test") | ||||
|     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), | ||||
|            X=hu.per_channel_tensor(shapes=hu.array_shapes(2, 5,), | ||||
|            qparams=hu.qparams(dtypes=torch.qint8))) | ||||
|  | ||||
| @ -3564,6 +3564,38 @@ graph(%Ra, %Rb): | ||||
|                 self.assertTrue(type(block.paramNode()) == torch._C.Node) | ||||
|         self.assertTrue(tested_blocks) | ||||
|  | ||||
|     def test_export_opnames(self): | ||||
|         class Foo(torch.jit.ScriptModule): | ||||
|             def __init__(self): | ||||
|                 super(Foo, self).__init__() | ||||
|  | ||||
|             def one(self, x, y): | ||||
|                 # type: (Tensor, Tensor) -> Tensor | ||||
|                 return x + y | ||||
|  | ||||
|             def two(self, x): | ||||
|                 # type: (Tensor) -> Tensor | ||||
|                 return 2 * x | ||||
|  | ||||
|             @torch.jit.script_method | ||||
|             def forward(self, x): | ||||
|                 # type: (Tensor) -> Tensor | ||||
|                 return self.one(self.two(x), x) | ||||
|  | ||||
|         class Bar(torch.jit.ScriptModule): | ||||
|             def __init__(self): | ||||
|                 super(Bar, self).__init__() | ||||
|                 self.sub = Foo() | ||||
|  | ||||
|             def forward(self, x): | ||||
|                 # type: (Tensor) -> Tensor | ||||
|                 return self.sub.forward(x) | ||||
|  | ||||
|         bar = Bar() | ||||
|         ops = torch.jit.export_opnames(bar) | ||||
|         expected = ['aten::add.Tensor', 'aten::mul.Scalar', 'prim::Constant'] | ||||
|         self.assertEqual(ops, expected) | ||||
|  | ||||
|     def test_pytorch_jit_env_off(self): | ||||
|         import subprocess | ||||
|         env = os.environ.copy() | ||||
| @ -7037,6 +7069,15 @@ a") | ||||
|         self.checkScript(func1, (), optimize=True) | ||||
|         self.checkScript(func2, (), optimize=True) | ||||
|  | ||||
|     # FIXME: get rid of this once we have actual ops using optional floats | ||||
|     def test_optional_float(self): | ||||
|         def _test_optional_float(x, scale): | ||||
|             # type: (Tensor, Optional[float]) -> torch.Tensor | ||||
|             return torch._test_optional_float(x, scale=scale) | ||||
|  | ||||
|         self.assertEqual([0], torch.jit.script(_test_optional_float)(torch.randn(()), None).shape) | ||||
|         self.assertEqual((), torch.jit.script(_test_optional_float)(torch.randn(()), 2.5).shape) | ||||
|  | ||||
|     def _test_tensor_number_math(self, device='cpu'): | ||||
|         template = dedent(''' | ||||
|         def func(t): | ||||
|  | ||||
| @ -1038,6 +1038,11 @@ class TestNamedTensor(TestCase): | ||||
|         self.assertEqual(out.names, ('N', 'C', 'H', 'W', 'K')) | ||||
|         self.assertEqual(out.shape, (7, 2, 3, 5, 11)) | ||||
|  | ||||
|         # takes negative positional dim | ||||
|         out = tensor.unflatten(-2, (('C', 2), ('H', 3), ('W', 5))) | ||||
|         self.assertEqual(out.names, ('N', 'C', 'H', 'W', 'K')) | ||||
|         self.assertEqual(out.shape, (7, 2, 3, 5, 11)) | ||||
|  | ||||
|         with self.assertRaisesRegex(RuntimeError, "don't multiply up to"): | ||||
|             tensor.unflatten('D', (('H', 3), ('W', 5))) | ||||
|  | ||||
|  | ||||
| @ -6232,6 +6232,38 @@ class TestNN(NNTestCase): | ||||
|         inp = torch.randn(4, 5, device='cuda', requires_grad=True) | ||||
|         gradgradcheck(F.pdist, (inp,)) | ||||
|  | ||||
|     def test_cosine_embedding_loss_with_diff_type(self): | ||||
|         for device in device_(): | ||||
|             input1 = torch.tensor([[2, 3, 4], [6, 2, 4]], dtype=torch.double, device=device) | ||||
|             input2 = torch.tensor([[2, 3, 5], [3, 2, 1]], dtype=torch.double, device=device) | ||||
|             target = torch.tensor([1, -1], dtype=torch.int, device=device) | ||||
|             expected = torch.nn.functional.cosine_embedding_loss(input1, input2, target) | ||||
|             for dt1 in torch.testing.get_all_math_dtypes(device): | ||||
|                 for dt2 in torch.testing.get_all_math_dtypes(device): | ||||
|                     for dt3 in torch.testing.get_all_math_dtypes(device): | ||||
|                         # dt3 is used as dtype for target = [1, -1], so let's skip unsigned type | ||||
|                         if dt3 == torch.uint8: | ||||
|                             continue | ||||
|                         input1 = input1.to(dt1) | ||||
|                         input2 = input2.to(dt2) | ||||
|                         target = target.to(dt3) | ||||
|                         result = torch.nn.functional.cosine_embedding_loss(input1, input2, target) | ||||
|                         self.assertEqual(result.item(), expected.item(), 0.001) | ||||
|  | ||||
|     def test_kl_div_with_diff_type(self): | ||||
|         for device in device_(): | ||||
|             input = torch.tensor([[2, 3, 5], [3, 2, 1]], dtype=torch.double, device=device) | ||||
|             target = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.double, device=device) | ||||
|             expected = torch.nn.functional.kl_div(input, target) | ||||
|             for input_dtype in torch.testing.get_all_math_dtypes(device): | ||||
|                 for target_dtype in [torch.float32, torch.float64, torch.float16]: | ||||
|                     if (torch.device(device).type == 'cpu' and target_dtype == torch.float16): | ||||
|                         continue | ||||
|                     input = input.to(input_dtype) | ||||
|                     target = target.to(target_dtype) | ||||
|                     result = torch.nn.functional.kl_div(input, target) | ||||
|                     self.assertEqual(result.item(), expected.item(), 0.001) | ||||
|  | ||||
|     def test_cosine_embedding_loss_no_reduce(self): | ||||
|         input1 = torch.randn(15, 10, requires_grad=True) | ||||
|         input2 = torch.randn(15, 10, requires_grad=True) | ||||
|  | ||||
| @ -309,6 +309,30 @@ class TestNumbaIntegration(common.TestCase): | ||||
|                 torch_ary += 42 | ||||
|                 self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary) + 42) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_NUMPY, "No numpy") | ||||
|     @unittest.skipIf(not TEST_CUDA, "No cuda") | ||||
|     @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") | ||||
|     def test_from_cuda_array_interface_inferred_strides(self): | ||||
|         """torch.as_tensor(numba_ary) should have correct inferred (contiguous) strides""" | ||||
|         # This could, in theory, be combined with test_from_cuda_array_interface but that test | ||||
|         # is overly strict: it checks that the exported protocols are exactly the same, which | ||||
|         # cannot handle differening exported protocol versions. | ||||
|         dtypes = [ | ||||
|             numpy.float64, | ||||
|             numpy.float32, | ||||
|             numpy.int64, | ||||
|             numpy.int32, | ||||
|             numpy.int16, | ||||
|             numpy.int8, | ||||
|             numpy.uint8, | ||||
|         ] | ||||
|         for dtype in dtypes: | ||||
|             numpy_ary = numpy.arange(6).reshape(2, 3).astype(dtype), | ||||
|             numba_ary = numba.cuda.to_device(numpy_ary) | ||||
|             self.assertTrue(numba_ary.is_c_contiguous()) | ||||
|             torch_ary = torch.as_tensor(numba_ary, device="cuda") | ||||
|             self.assertTrue(torch_ary.is_contiguous()) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_NUMPY, "No numpy") | ||||
|     @unittest.skipIf(not TEST_CUDA, "No cuda") | ||||
|     @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda") | ||||
|  | ||||
| @ -11,15 +11,13 @@ import torch.backends.mkldnn | ||||
| from common_utils import TestCase, run_tests | ||||
| from hypothesis import given | ||||
| from hypothesis import strategies as st | ||||
| from hypothesis_utils import no_deadline | ||||
| import hypothesis_utils as hu | ||||
| hu.assert_deadline_disabled() | ||||
| from functools import reduce | ||||
|  | ||||
|  | ||||
| class IntrinsicQATModuleTest(TestCase): | ||||
|     # NOTE: Tests in this class are decorated with no_deadline | ||||
|     # to prevent spurious failures due to cuda runtime initialization. | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(batch_size=st.integers(2, 4), | ||||
|            input_channels_per_group=st.sampled_from([2, 3, 4]), | ||||
|            height=st.integers(5, 10), | ||||
|  | ||||
| @ -42,7 +42,8 @@ from jit_utils import get_forward | ||||
|  | ||||
| from hypothesis import given | ||||
| from hypothesis import strategies as st | ||||
| from hypothesis_utils import no_deadline | ||||
| import hypothesis_utils as hu | ||||
| hu.assert_deadline_disabled() | ||||
| import io | ||||
| import copy | ||||
|  | ||||
| @ -50,7 +51,6 @@ import copy | ||||
|                      " Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs" | ||||
|                      " with instruction set support avx2 or newer.") | ||||
| class EagerModePostTrainingQuantTest(QuantizationTestCase): | ||||
|     @no_deadline | ||||
|     @given(qconfig=st.sampled_from((torch.quantization.default_qconfig, torch.quantization.default_per_channel_qconfig))) | ||||
|     def test_single_layer(self, qconfig): | ||||
|         r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped | ||||
| @ -919,7 +919,6 @@ class GraphModePostTrainingQuantTest(QuantizationTestCase): | ||||
|  | ||||
| class FunctionalModuleTest(QuantizationTestCase): | ||||
|     # Histogram Observers are slow, so have no-deadline to ensure test doesn't time out | ||||
|     @no_deadline | ||||
|     @given(train_mode=st.booleans()) | ||||
|     def test_functional_module(self, train_mode): | ||||
|         model = ModelWithFunctionals() | ||||
| @ -1349,7 +1348,6 @@ class RecordHistogramObserverTest(QuantizationTestCase): | ||||
|         self.assertEqual(len(observer_dict['fc1.module.activation_post_process'].get_tensor_value()), 2 * len(self.calib_data)) | ||||
|         self.assertEqual(observer_dict['fc1.module.activation_post_process'].get_tensor_value()[0], model(self.calib_data[0][0])) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(qdtype=st.sampled_from((torch.qint8, torch.quint8)), | ||||
|            qscheme=st.sampled_from((torch.per_tensor_affine, torch.per_tensor_symmetric))) | ||||
|     def test_observer_scriptable(self, qdtype, qscheme): | ||||
| @ -1366,7 +1364,6 @@ class RecordHistogramObserverTest(QuantizationTestCase): | ||||
|         loaded = torch.jit.load(buf) | ||||
|         self.assertTrue(torch.equal(obs.get_tensor_value()[0], loaded.get_tensor_value()[0])) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(qdtype=st.sampled_from((torch.qint8, torch.quint8)), | ||||
|            qscheme=st.sampled_from((torch.per_tensor_affine, torch.per_tensor_symmetric)), | ||||
|            reduce_range=st.booleans()) | ||||
|  | ||||
| @ -10,7 +10,7 @@ from hypothesis import settings, HealthCheck | ||||
| from hypothesis import assume, given | ||||
| from hypothesis import strategies as st | ||||
| import hypothesis_utils as hu | ||||
| from hypothesis_utils import no_deadline | ||||
| hu.assert_deadline_disabled() | ||||
|  | ||||
| from common_utils import TEST_WITH_UBSAN, TestCase, run_tests, IS_PPC, IS_MACOS | ||||
| from common_quantized import _quantize, _dequantize, _calculate_dynamic_qparams, \ | ||||
| @ -145,7 +145,6 @@ class TestQuantizedOps(TestCase): | ||||
|                                  message="{} relu failed".format(name)) | ||||
|  | ||||
|     """Tests the correctness of the scalar addition.""" | ||||
|     @no_deadline | ||||
|     @given(A=hu.tensor(shapes=hu.array_shapes(1, 4, 1, 5), | ||||
|                        elements=st.floats(-1e6, 1e6, allow_nan=False), | ||||
|                        qparams=hu.qparams()), | ||||
| @ -506,7 +505,6 @@ class TestQuantizedOps(TestCase): | ||||
|         self.assertEqual(a_ref, a_hat.dequantize(), | ||||
|                          message="ops.quantized.max_pool2d results are off") | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, | ||||
|                                               min_side=5, max_side=10), | ||||
|                        qparams=hu.qparams(dtypes=torch.quint8)), | ||||
| @ -556,7 +554,6 @@ class TestQuantizedOps(TestCase): | ||||
|                              message=error_message.format(name + '.zero_point', scale, | ||||
|                                                           qX_hat.q_zero_point())) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4, | ||||
|                                               min_side=5, max_side=10), | ||||
|                        qparams=hu.qparams(dtypes=torch.qint8)), | ||||
| @ -619,7 +616,6 @@ class TestQuantizedOps(TestCase): | ||||
|                              message=error_message.format(name + '.zero_point', scale, | ||||
|                              X_hat.q_zero_point())) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4, | ||||
|                                               min_side=1, max_side=10), | ||||
|                        qparams=hu.qparams(dtypes=torch.quint8)), | ||||
| @ -662,7 +658,6 @@ class TestQuantizedOps(TestCase): | ||||
|                                                           qX_hat.q_zero_point())) | ||||
|  | ||||
|     """Tests adaptive average pool operation on NHWC quantized tensors.""" | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4, | ||||
|                                               min_side=1, max_side=10), | ||||
|                        qparams=hu.qparams(dtypes=torch.qint8)), | ||||
| @ -708,7 +703,6 @@ class TestQuantizedOps(TestCase): | ||||
|                              message=error_message.format(name + '.zero_point', scale, | ||||
|                                                           X_hat.q_zero_point())) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, | ||||
|                                               min_side=1, max_side=10), | ||||
|                        qparams=hu.qparams()), | ||||
| @ -733,7 +727,6 @@ class TestQuantizedOps(TestCase): | ||||
|         torch.testing.assert_allclose(quantized_out[0].dequantize(), unquantized_out[0]) | ||||
|         torch.testing.assert_allclose(quantized_out[1], unquantized_out[1]) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4, | ||||
|                                               min_side=1, max_side=10), | ||||
|                        qparams=hu.qparams()), | ||||
| @ -818,7 +811,6 @@ class TestQuantizedOps(TestCase): | ||||
|             cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale, | ||||
|                              zero_point=zero_point) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4, | ||||
|                                               min_side=5, max_side=10), | ||||
|                        qparams=hu.qparams()), | ||||
| @ -874,7 +866,6 @@ class TestQuantizedOps(TestCase): | ||||
|                                                           qX_hat.q_zero_point())) | ||||
|  | ||||
|     """Tests quantize concatenation (both fused and not).""" | ||||
|     @no_deadline | ||||
|     @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4, | ||||
|                                               min_side=1, max_side=10), | ||||
|                        qparams=hu.qparams()), | ||||
| @ -999,7 +990,6 @@ class TestQuantizedOps(TestCase): | ||||
|                      " with instruction set support avx2 or newer.") | ||||
| class TestDynamicQuantizedLinear(TestCase): | ||||
|     """Tests the correctness of the dynamic quantized linear and linear_relu op.""" | ||||
|     @no_deadline | ||||
|     @given( | ||||
|         batch_size=st.integers(1, 4), | ||||
|         input_channels=st.integers(16, 32), | ||||
| @ -1112,7 +1102,6 @@ class TestDynamicQuantizedLinear(TestCase): | ||||
|                          message="torch.ops.quantized.linear_dynamic (fbgemm) results are off") | ||||
|  | ||||
|     """Tests the correctness of the legacy dynamic quantized linear op.""" | ||||
|     @no_deadline | ||||
|     @given( | ||||
|         batch_size=st.integers(1, 4), | ||||
|         input_channels=st.integers(16, 32), | ||||
| @ -1189,7 +1178,6 @@ class TestDynamicQuantizedLinear(TestCase): | ||||
|  | ||||
| class TestQuantizedLinear(unittest.TestCase): | ||||
|     """Tests the correctness of the quantized linear and linear_relu op.""" | ||||
|     @no_deadline | ||||
|     @given(batch_size=st.integers(1, 4), | ||||
|            input_channels=st.integers(16, 32), | ||||
|            output_channels=st.integers(4, 8), | ||||
|  | ||||
| @ -13,7 +13,8 @@ from common_quantized import _calculate_dynamic_qparams, override_quantized_engi | ||||
| from common_utils import run_tests, IS_PPC, TEST_WITH_UBSAN | ||||
| from hypothesis import assume, given | ||||
| from hypothesis import strategies as st | ||||
| from hypothesis_utils import no_deadline | ||||
| import hypothesis_utils as hu | ||||
| hu.assert_deadline_disabled() | ||||
|  | ||||
| import io | ||||
| import numpy as np | ||||
| @ -127,7 +128,6 @@ class FunctionalAPITest(QuantizationTestCase): | ||||
|  | ||||
|  | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(batch_size=st.integers(1, 3), | ||||
|            in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), | ||||
|            H=st.integers(4, 16), | ||||
| @ -181,7 +181,6 @@ class FunctionalAPITest(QuantizationTestCase): | ||||
|                 W_scale, W_zero_point, Y_scale, Y_zero_point, use_bias, | ||||
|                 use_channelwise) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(batch_size=st.integers(1, 3), | ||||
|            in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), | ||||
|            D=st.integers(4, 8), | ||||
| @ -239,7 +238,6 @@ class FunctionalAPITest(QuantizationTestCase): | ||||
|  | ||||
|  | ||||
| class DynamicModuleAPITest(QuantizationTestCase): | ||||
|     @no_deadline | ||||
|     @unittest.skipUnless('fbgemm' in torch.backends.quantized.supported_engines, | ||||
|                          " Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs" | ||||
|                          " with instruction set support avx2 or newer.") | ||||
| @ -357,7 +355,6 @@ class ModuleAPITest(QuantizationTestCase): | ||||
|                          message="ReLU6 module API failed") | ||||
|  | ||||
|  | ||||
|     @no_deadline | ||||
|     @given( | ||||
|         batch_size=st.integers(1, 5), | ||||
|         in_features=st.integers(16, 32), | ||||
| @ -421,7 +418,6 @@ class ModuleAPITest(QuantizationTestCase): | ||||
|             self.assertEqual(Z_ref, Z_q) | ||||
|  | ||||
|             # Test serialization of quantized Linear Module using state_dict | ||||
|  | ||||
|             model_dict = qlinear.state_dict() | ||||
|             self.assertEqual(model_dict['_packed_params.weight'], W_q) | ||||
|             if use_bias: | ||||
| @ -647,7 +643,6 @@ class ModuleAPITest(QuantizationTestCase): | ||||
|         # Smoke test extra_repr | ||||
|         self.assertTrue(module_name in str(converted_qconv_module)) | ||||
|  | ||||
|     @no_deadline | ||||
|     @given(batch_size=st.integers(1, 3), | ||||
|            in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), | ||||
|            H=st.integers(4, 16), | ||||
|  | ||||
| @ -763,6 +763,45 @@ class _TestTorchMixin(object): | ||||
|             res = torch.where(a > 0) | ||||
|             self.assertEqual(1, len(res)) | ||||
|  | ||||
|     def test_where_tensor(self): | ||||
|         def rand_tensor(size, dtype, device): | ||||
|             if dtype.is_floating_point: | ||||
|                 return torch.rand(size=size, dtype=dtype, device=device) | ||||
|             elif dtype == torch.uint8: | ||||
|                 return torch.randint(1, 5, size=size, dtype=dtype, device=device) | ||||
|             elif dtype == torch.bool: | ||||
|                 return torch.randint(0, 1, size=size, dtype=dtype, device=device).bool() | ||||
|             else: | ||||
|                 return torch.randint(-5, 5, size=size, dtype=dtype, device=device) | ||||
|  | ||||
|         def get_tensor(size, dtype, device, contiguous): | ||||
|             if not contiguous and len(size) < 2: | ||||
|                 raise RuntimeError("Unable to generate non contiguous tensor with size < 2") | ||||
|             t = rand_tensor(size, dtype, device) | ||||
|             if contiguous: | ||||
|                 return t | ||||
|             else: | ||||
|                 return t.transpose(0, 1) | ||||
|  | ||||
|         height = 5 | ||||
|         width = 5 | ||||
|         for device in torch.testing.get_all_device_types(): | ||||
|             for dt1 in torch.testing.get_all_math_dtypes(device): | ||||
|                 for dt2 in torch.testing.get_all_math_dtypes(device): | ||||
|                     for contiguous in [True, False]: | ||||
|                         x1 = get_tensor((height, width), dt1, device, contiguous) | ||||
|                         x2 = get_tensor((height, width), dt2, device, contiguous) | ||||
|                         if dt1 != dt2: | ||||
|                             self.assertRaisesRegex(RuntimeError, "expected scalar type", lambda: torch.where(x1 == 1, x1, x2)) | ||||
|                         else: | ||||
|                             if x1.is_floating_point(): | ||||
|                                 condition = (x1 < 0.5) | ||||
|                             else: | ||||
|                                 condition = (x1 == 1) | ||||
|                             expected = condition.to(x1.dtype) * x1 + (~condition).to(x2.dtype) * x2 | ||||
|                             result = torch.where(condition, x1, x2) | ||||
|                             self.assertEqual(expected, result) | ||||
|  | ||||
|     def test_all_any_with_dim(self): | ||||
|         def test(x): | ||||
|             r1 = x.prod(dim=0, keepdim=False).byte() | ||||
| @ -1772,6 +1811,13 @@ class _TestTorchMixin(object): | ||||
|         x = torch.tensor(2., requires_grad=True) | ||||
|         self.assertRaises(Exception, lambda: y.addcmul(y, y, value=x)) | ||||
|  | ||||
|     # FIXME: get rid of this once we have actual ops using optional floats | ||||
|     def test_optional_floats(self): | ||||
|         x = torch.randn(()) | ||||
|         self.assertEqual(torch._test_optional_float(x), torch.empty((0,))) | ||||
|         self.assertEqual(torch._test_optional_float(x, scale=None), torch.empty((0,))) | ||||
|         self.assertEqual(torch._test_optional_float(x, scale=2.5), torch.full((), 2.5)) | ||||
|  | ||||
|     def test_copy_broadcast(self): | ||||
|         torch.zeros(5, 6).copy_(torch.zeros(6)) | ||||
|         self.assertRaises(RuntimeError, lambda: torch.zeros(5, 6).copy_(torch.zeros(30))) | ||||
| @ -13661,23 +13707,69 @@ class TestTorchDeviceType(TestCase): | ||||
|         result = torch.cat(concat_list) | ||||
|         self.assertEqual(result.size(0), SIZE1 + SIZE2) | ||||
|  | ||||
| # NOTE [Linspace+Logspace precision override] | ||||
| # Our Linspace and logspace torch.half CUDA kernels are not very precise. | ||||
| # Since linspace/logspace are deterministic, we can compute an expected | ||||
| # amount of error (by testing without a precision override), adding a tiny | ||||
| # amount (EPS) to that, and using that value as the override. | ||||
| LINSPACE_LOGSPACE_EXTRA_EPS = 1e-5 | ||||
|  | ||||
| # Tests that compare a device's computation with the (gold-standard) CPU's. | ||||
| class TestDevicePrecision(TestCase): | ||||
|     def test_linspace(self, device): | ||||
|         a = torch.linspace(0, 10, 10, device=device) | ||||
|         b = torch.linspace(0, 10, 10) | ||||
|  | ||||
|     # The implementation of linspace+logspace goes through a different path | ||||
|     # when the steps arg is equal to 0 or 1. For other values of `steps` | ||||
|     # they call specialized linspace (or logspace) kernels. | ||||
|     LINSPACE_LOGSPACE_SPECIAL_STEPS = [0, 1] | ||||
|  | ||||
|     def _test_linspace(self, device, dtype, steps): | ||||
|         a = torch.linspace(0, 10, steps=steps, dtype=dtype, device=device) | ||||
|         b = torch.linspace(0, 10, steps=steps) | ||||
|         self.assertEqual(a, b) | ||||
|  | ||||
|     @dtypes(torch.double) | ||||
|     # See NOTE [Linspace+Logspace precision override] | ||||
|     @precisionOverride({torch.half: 0.0039 + LINSPACE_LOGSPACE_EXTRA_EPS}) | ||||
|     @dtypesIfCUDA(torch.half, torch.float, torch.double) | ||||
|     @dtypes(torch.float, torch.double) | ||||
|     def test_linspace(self, device, dtype): | ||||
|         self._test_linspace(device, dtype, steps=10) | ||||
|  | ||||
|     @dtypesIfCUDA(torch.half, torch.float, torch.double) | ||||
|     @dtypes(torch.float, torch.double) | ||||
|     def test_linspace_special_steps(self, device, dtype): | ||||
|         for steps in self.LINSPACE_LOGSPACE_SPECIAL_STEPS: | ||||
|             self._test_linspace(device, dtype, steps=steps) | ||||
|  | ||||
|     def _test_logspace(self, device, dtype, steps): | ||||
|         a = torch.logspace(1, 1.1, steps=steps, dtype=dtype, device=device) | ||||
|         b = torch.logspace(1, 1.1, steps=steps) | ||||
|         self.assertEqual(a, b) | ||||
|  | ||||
|     def _test_logspace_base2(self, device, dtype, steps): | ||||
|         a = torch.logspace(1, 1.1, steps=steps, base=2, dtype=dtype, device=device) | ||||
|         b = torch.logspace(1, 1.1, steps=steps, base=2) | ||||
|         self.assertEqual(a, b) | ||||
|  | ||||
|     # See NOTE [Linspace+Logspace precision override] | ||||
|     @precisionOverride({torch.half: 0.0157 + LINSPACE_LOGSPACE_EXTRA_EPS}) | ||||
|     @dtypesIfCUDA(torch.half, torch.float, torch.double) | ||||
|     @dtypes(torch.float, torch.double) | ||||
|     def test_logspace(self, device, dtype): | ||||
|         a = torch.logspace(1, 10, 10, dtype=dtype, device=device) | ||||
|         b = torch.logspace(1, 10, 10, dtype=dtype, device='cpu') | ||||
|         self.assertEqual(a, b) | ||||
|         self._test_logspace(device, dtype, steps=10) | ||||
|  | ||||
|         # Check non-default base=2 | ||||
|         a = torch.logspace(1, 10, 10, 2, dtype=dtype, device=device) | ||||
|         b = torch.logspace(1, 10, 10, 2, dtype=dtype, device='cpu') | ||||
|         self.assertEqual(a, b) | ||||
|     # See NOTE [Linspace+Logspace precision override] | ||||
|     @precisionOverride({torch.half: 0.00201 + LINSPACE_LOGSPACE_EXTRA_EPS}) | ||||
|     @dtypesIfCUDA(torch.half, torch.float, torch.double) | ||||
|     @dtypes(torch.float, torch.double) | ||||
|     def test_logspace_base2(self, device, dtype): | ||||
|         self._test_logspace_base2(device, dtype, steps=10) | ||||
|  | ||||
|     @dtypesIfCUDA(torch.half, torch.float, torch.double) | ||||
|     @dtypes(torch.float, torch.double) | ||||
|     def test_logspace_special_steps(self, device, dtype): | ||||
|         for steps in self.LINSPACE_LOGSPACE_SPECIAL_STEPS: | ||||
|             self._test_logspace(device, dtype, steps=steps) | ||||
|             self._test_logspace_base2(device, dtype, steps=steps) | ||||
|  | ||||
|     # Note: ROCm fails when using float tensors | ||||
|     @dtypes(torch.double) | ||||
|  | ||||
							
								
								
									
										2
									
								
								third_party/fbgemm
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
							
						
						
									
										2
									
								
								third_party/fbgemm
									
									
									
									
										vendored
									
									
								
							 Submodule third_party/fbgemm updated: 6aaaa4754f...399ea148f1
									
								
							| @ -328,6 +328,7 @@ def create_python_bindings(python_functions, has_self, is_module=False): | ||||
|         'c10::optional<Scalar>': 'scalarOptional', | ||||
|         'c10::optional<int64_t>': 'toInt64Optional', | ||||
|         'c10::optional<bool>': 'toBoolOptional', | ||||
|         'c10::optional<double>': 'toDoubleOptional', | ||||
|         'IntArrayRef': 'intlist', | ||||
|         'int64_t': 'toInt64', | ||||
|         'bool': 'toBool', | ||||
|  | ||||
| @ -63,6 +63,7 @@ TYPE_MAP = { | ||||
|     'int64_t': 'int', | ||||
|     'int64_t?': 'int?', | ||||
|     'double': 'float', | ||||
|     'double?': 'float?', | ||||
|     'bool': 'bool', | ||||
|     'bool?': 'bool?', | ||||
|     'Generator': 'Generator?', | ||||
| @ -115,6 +116,7 @@ FROM_IVALUE = { | ||||
|     'bool': '{}.toBool()', | ||||
|     'bool?': '{}.toOptional<bool>()', | ||||
|     'double': '{}.toDouble()', | ||||
|     'double?': '{}.toOptional<double>()', | ||||
|     'int64_t': '{}.toInt()', | ||||
|     'int64_t?': '{}.toOptional<int64_t>()', | ||||
|     'std::string': '{}.toStringRef()', | ||||
|  | ||||
| @ -3751,25 +3751,37 @@ add_docstr(torch.nonzero, | ||||
|            r""" | ||||
| nonzero(input, *, out=None, as_tuple=False) -> LongTensor or tuple of LongTensors | ||||
|  | ||||
| **When** :attr:`as_tuple` **is false or unspecified:** | ||||
| .. note:: | ||||
|     :func:`torch.nonzero(..., as_tuple=False) <torch.nonzero>` (default) returns a | ||||
|     2-D tensor where each row is the index for a nonzero value. | ||||
|  | ||||
|     :func:`torch.nonzero(..., as_tuple=True) <torch.nonzero>` returns a tuple of 1-D | ||||
|     index tensors, allowing for advanced indexing, so ``x[x.nonzero(as_tuple=True)]`` | ||||
|     gives all nonzero values of tensor ``x``. Of the returned tuple, each index tensor | ||||
|     contains nonzero indices for a certain dimension. | ||||
|  | ||||
|     See below for more details on the two behaviors. | ||||
|  | ||||
|  | ||||
| **When** :attr:`as_tuple` **is ``False`` (default)**: | ||||
|  | ||||
| Returns a tensor containing the indices of all non-zero elements of | ||||
| :attr:`input`.  Each row in the result contains the indices of a non-zero | ||||
| element in :attr:`input`. The result is sorted lexicographically, with | ||||
| the last index changing the fastest (C-style). | ||||
|  | ||||
| If :attr:`input` has `n` dimensions, then the resulting indices tensor | ||||
| If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor | ||||
| :attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of | ||||
| non-zero elements in the :attr:`input` tensor. | ||||
|  | ||||
| **When** :attr:`as_tuple` **is true:** | ||||
| **When** :attr:`as_tuple` **is ``True``**: | ||||
|  | ||||
| Returns a tuple of 1-D tensors, one for each dimension in :attr:`input`, | ||||
| each containing the indices (in that dimension) of all non-zero elements of | ||||
| :attr:`input` . | ||||
|  | ||||
| If :attr:`input` has `n` dimensions, then the resulting tuple contains `n` tensors | ||||
| of size `z`, where `z` is the total number of | ||||
| If :attr:`input` has :math:`n` dimensions, then the resulting tuple contains :math:`n` | ||||
| tensors of size :math:`z`, where :math:`z` is the total number of | ||||
| non-zero elements in the :attr:`input` tensor. | ||||
|  | ||||
| As a special case, when :attr:`input` has zero dimensions and a nonzero scalar | ||||
| @ -3780,8 +3792,8 @@ Args: | ||||
|     out (LongTensor, optional): the output tensor containing indices | ||||
|  | ||||
| Returns: | ||||
|     LongTensor or tuple of LongTensor: If :attr:`as_tuple` is false, the output | ||||
|     tensor containing indices. If :attr:`as_tuple` is true, one 1-D tensor for | ||||
|     LongTensor or tuple of LongTensor: If :attr:`as_tuple` is ``False``, the output | ||||
|     tensor containing indices. If :attr:`as_tuple` is ``True``, one 1-D tensor for | ||||
|     each dimension, containing the indices of each nonzero element along that | ||||
|     dimension. | ||||
|  | ||||
| @ -5199,6 +5211,9 @@ i.e., if the last two dimensions of :attr:`input` are ``m`` and ``n``, then the | ||||
| If :attr:`compute_uv` is ``False``, the returned `U` and `V` matrices will be zero matrices | ||||
| of shape :math:`(m \times m)` and :math:`(n \times n)` respectively. :attr:`some` will be ignored here. | ||||
|  | ||||
| .. note:: The singular values are returned in descending order. If :attr:`input` is a batch of matrices, | ||||
|           then the singular values of each matrix in the batch is returned in descending order. | ||||
|  | ||||
| .. note:: The implementation of SVD on CPU uses the LAPACK routine `?gesdd` (a divide-and-conquer | ||||
|           algorithm) instead of `?gesvd` for speed. Analogously, the SVD on GPU uses the MAGMA routine | ||||
|           `gesdd` as well. | ||||
| @ -5279,6 +5294,9 @@ only the upper triangular portion is used by default. | ||||
|  | ||||
| If :attr:`upper` is ``False``, then lower triangular portion is used. | ||||
|  | ||||
| .. note:: The eigenvalues are returned in ascending order. If :attr:`input` is a batch of matrices, | ||||
|           then the eigenvalues of each matrix in the batch is returned in ascending order. | ||||
|  | ||||
| .. note:: Irrespective of the original strides, the returned matrix `V` will | ||||
|           be transposed, i.e. with strides `V.contiguous().transpose(-1, -2).stride()`. | ||||
|  | ||||
| @ -5782,7 +5800,7 @@ The upper triangular part of the matrix is defined as the elements on and | ||||
| above the diagonal. | ||||
|  | ||||
| The argument :attr:`diagonal` controls which diagonal to consider. If | ||||
| :attr:`diagonal` = 0, all elements on and below the main diagonal are | ||||
| :attr:`diagonal` = 0, all elements on and above the main diagonal are | ||||
| retained. A positive value excludes just as many diagonals above the main | ||||
| diagonal, and similarly a negative value includes just as many diagonals below | ||||
| the main diagonal. The main diagonal are the set of indices | ||||
|  | ||||
| @ -22,6 +22,8 @@ namespace datasets { | ||||
| template <typename ExampleType_, typename ChunkType_ = std::vector<ExampleType_>> | ||||
| class ChunkDataReader { | ||||
|  public: | ||||
|   virtual ~ChunkDataReader() = default; | ||||
|  | ||||
|   using ChunkType = ChunkType_; | ||||
|   using ExampleType = ExampleType_; | ||||
|  | ||||
|  | ||||
| @ -47,7 +47,7 @@ class Cloneable : public virtual Module { | ||||
|         "parameters as the original module after calling reset(). " | ||||
|         "Are you sure you called register_parameter() inside reset() " | ||||
|         "and not the constructor?"); | ||||
|     for (const auto& parameter : parameters_) { | ||||
|     for (const auto& parameter : named_parameters(/*recurse=*/false)) { | ||||
|       auto& tensor = *parameter; | ||||
|       auto data = device && tensor.device() != *device ? | ||||
|           tensor.to(*device) : autograd::Variable(tensor).clone(); | ||||
| @ -59,7 +59,7 @@ class Cloneable : public virtual Module { | ||||
|         "buffers as the original module after calling reset(). " | ||||
|         "Are you sure you called register_buffer() inside reset() " | ||||
|         "and not the constructor?"); | ||||
|     for (const auto& buffer : buffers_) { | ||||
|     for (const auto& buffer : named_buffers(/*recurse=*/false)) { | ||||
|       auto& tensor = *buffer; | ||||
|       auto data = device && tensor.device() != *device ? | ||||
|           tensor.to(*device) : autograd::Variable(tensor).clone(); | ||||
|  | ||||
| @ -648,11 +648,11 @@ void Module::to_impl(Ts&&... ts) { | ||||
|     child.value()->to(ts...); | ||||
|   } | ||||
|   // Then move every parameter to the new dtype/device. | ||||
|   for (auto& parameter : parameters_) { | ||||
|   for (auto& parameter : named_parameters(/*recurse=*/false)) { | ||||
|     parameter->set_data(autograd::Variable(*parameter).to(ts...)); | ||||
|   } | ||||
|   // Then move every buffer to the new dtype/device. | ||||
|   for (auto& buffer : buffers_) { | ||||
|   for (auto& buffer : named_buffers(/*recurse=*/false)) { | ||||
|     buffer->set_data(autograd::Variable(*buffer).to(ts...)); | ||||
|   } | ||||
| } | ||||
|  | ||||
| @ -9,8 +9,6 @@ | ||||
|  | ||||
| #include <cstdint> | ||||
|  | ||||
| namespace F = torch::nn::functional; | ||||
|  | ||||
| namespace torch { | ||||
| namespace nn { | ||||
|  | ||||
| @ -178,7 +176,7 @@ class BatchNormImplBase : public NormImplBase<D, Derived, BatchNormOptions> { | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     return F::detail::batch_norm( | ||||
|     return torch::nn::functional::detail::batch_norm( | ||||
|         input, | ||||
|         this->running_mean, | ||||
|         this->running_var, | ||||
|  | ||||
| @ -17,9 +17,9 @@ namespace nn { | ||||
|  | ||||
| /// Base class for all (dimension-specialized) convolution modules. | ||||
| template <size_t D, typename Derived> | ||||
| class ConvImpl : public torch::nn::Cloneable<Derived> { | ||||
| class ConvNdImpl : public torch::nn::Cloneable<Derived> { | ||||
|  public: | ||||
|   explicit ConvImpl(ConvOptions<D> options_) : options(std::move(options_)) { | ||||
|   explicit ConvNdImpl(detail::ConvNdOptions<D> options_) : options(std::move(options_)) { | ||||
|     reset(); | ||||
|   } | ||||
|  | ||||
| @ -98,7 +98,7 @@ class ConvImpl : public torch::nn::Cloneable<Derived> { | ||||
|   } | ||||
|  | ||||
|   /// The options with which this `Module` was constructed. | ||||
|   ConvOptions<D> options; | ||||
|   detail::ConvNdOptions<D> options; | ||||
|  | ||||
|   /// The learned kernel (or "weight"). | ||||
|   Tensor weight; | ||||
| @ -112,15 +112,15 @@ class ConvImpl : public torch::nn::Cloneable<Derived> { | ||||
| /// Applies convolution over a 1-D input. | ||||
| /// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv1d to learn about | ||||
| /// the exact behavior of this module. | ||||
| class TORCH_API Conv1dImpl : public ConvImpl<1, Conv1dImpl> { | ||||
| class TORCH_API Conv1dImpl : public ConvNdImpl<1, Conv1dImpl> { | ||||
|  public: | ||||
|   Conv1dImpl( | ||||
|       int64_t input_channels, | ||||
|       int64_t output_channels, | ||||
|       ExpandingArray<1> kernel_size) | ||||
|       : Conv1dImpl(ConvOptions<1>(input_channels, output_channels, kernel_size)) { | ||||
|       : Conv1dImpl(Conv1dOptions(input_channels, output_channels, kernel_size)) { | ||||
|   } | ||||
|   explicit Conv1dImpl(ConvOptions<1> options_); | ||||
|   explicit Conv1dImpl(Conv1dOptions options_); | ||||
|   Tensor forward(const Tensor& input); | ||||
| }; | ||||
|  | ||||
| @ -135,15 +135,15 @@ TORCH_MODULE(Conv1d); | ||||
| /// Applies convolution over a 2-D input. | ||||
| /// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d to learn about | ||||
| /// the exact behavior of this module. | ||||
| class TORCH_API Conv2dImpl : public ConvImpl<2, Conv2dImpl> { | ||||
| class TORCH_API Conv2dImpl : public ConvNdImpl<2, Conv2dImpl> { | ||||
|  public: | ||||
|   Conv2dImpl( | ||||
|       int64_t input_channels, | ||||
|       int64_t output_channels, | ||||
|       ExpandingArray<2> kernel_size) | ||||
|       : Conv2dImpl(ConvOptions<2>(input_channels, output_channels, kernel_size)) { | ||||
|       : Conv2dImpl(Conv2dOptions(input_channels, output_channels, kernel_size)) { | ||||
|   } | ||||
|   explicit Conv2dImpl(ConvOptions<2> options_); | ||||
|   explicit Conv2dImpl(Conv2dOptions options_); | ||||
|   Tensor forward(const Tensor& input); | ||||
| }; | ||||
|  | ||||
| @ -158,15 +158,15 @@ TORCH_MODULE(Conv2d); | ||||
| /// Applies convolution over a 3-D input. | ||||
| /// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv3d to learn about | ||||
| /// the exact behavior of this module. | ||||
| class TORCH_API Conv3dImpl : public ConvImpl<3, Conv3dImpl> { | ||||
| class TORCH_API Conv3dImpl : public ConvNdImpl<3, Conv3dImpl> { | ||||
|  public: | ||||
|   Conv3dImpl( | ||||
|       int64_t input_channels, | ||||
|       int64_t output_channels, | ||||
|       ExpandingArray<3> kernel_size) | ||||
|       : Conv3dImpl(ConvOptions<3>(input_channels, output_channels, kernel_size)) { | ||||
|       : Conv3dImpl(Conv3dOptions(input_channels, output_channels, kernel_size)) { | ||||
|   } | ||||
|   explicit Conv3dImpl(ConvOptions<3> options_); | ||||
|   explicit Conv3dImpl(Conv3dOptions options_); | ||||
|   Tensor forward(const Tensor& input); | ||||
| }; | ||||
|  | ||||
| @ -180,9 +180,9 @@ TORCH_MODULE(Conv3d); | ||||
|  | ||||
| /// Base class for all (dimension-specialized) convolution transpose modules. | ||||
| template <size_t D, typename Derived> | ||||
| class ConvTransposeImpl : public ConvImpl<D, Derived> { | ||||
| class ConvTransposeNdImpl : public ConvNdImpl<D, Derived> { | ||||
|  public: | ||||
|   using torch::nn::ConvImpl<D, Derived>::ConvImpl; | ||||
|   using torch::nn::ConvNdImpl<D, Derived>::ConvNdImpl; | ||||
|  | ||||
|   /// Pretty prints the `ConvTranspose{1,2,3}d` module into the given `stream`. | ||||
|   void pretty_print(std::ostream& stream) const override { | ||||
| @ -224,15 +224,15 @@ class ConvTransposeImpl : public ConvImpl<D, Derived> { | ||||
| /// Applies the ConvTranspose1d function. | ||||
| /// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose1d to | ||||
| /// learn about the exact behavior of this module. | ||||
| class TORCH_API ConvTranspose1dImpl : public ConvTransposeImpl<1, ConvTranspose1dImpl> { | ||||
| class TORCH_API ConvTranspose1dImpl : public ConvTransposeNdImpl<1, ConvTranspose1dImpl> { | ||||
|  public: | ||||
|   ConvTranspose1dImpl( | ||||
|       int64_t input_channels, | ||||
|       int64_t output_channels, | ||||
|       ExpandingArray<1> kernel_size) | ||||
|       : ConvTranspose1dImpl(ConvTransposeOptions<1>(input_channels, output_channels, kernel_size)) { | ||||
|       : ConvTranspose1dImpl(ConvTranspose1dOptions(input_channels, output_channels, kernel_size)) { | ||||
|   } | ||||
|   explicit ConvTranspose1dImpl(ConvTransposeOptions<1> options_); | ||||
|   explicit ConvTranspose1dImpl(ConvTranspose1dOptions options_); | ||||
|   Tensor forward(const Tensor& input, | ||||
|                  const c10::optional<at::IntArrayRef>& output_size = c10::nullopt); | ||||
| }; | ||||
| @ -244,15 +244,15 @@ TORCH_MODULE(ConvTranspose1d); | ||||
| /// Applies the ConvTranspose2d function. | ||||
| /// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose2d to | ||||
| /// learn about the exact behavior of this module. | ||||
| class TORCH_API ConvTranspose2dImpl : public ConvTransposeImpl<2, ConvTranspose2dImpl> { | ||||
| class TORCH_API ConvTranspose2dImpl : public ConvTransposeNdImpl<2, ConvTranspose2dImpl> { | ||||
|  public: | ||||
|   ConvTranspose2dImpl( | ||||
|       int64_t input_channels, | ||||
|       int64_t output_channels, | ||||
|       ExpandingArray<2> kernel_size) | ||||
|       : ConvTranspose2dImpl(ConvTransposeOptions<2>(input_channels, output_channels, kernel_size)) { | ||||
|       : ConvTranspose2dImpl(ConvTranspose2dOptions(input_channels, output_channels, kernel_size)) { | ||||
|   } | ||||
|   explicit ConvTranspose2dImpl(ConvTransposeOptions<2> options_); | ||||
|   explicit ConvTranspose2dImpl(ConvTranspose2dOptions options_); | ||||
|   Tensor forward(const Tensor& input, | ||||
|                  const c10::optional<at::IntArrayRef>& output_size = c10::nullopt); | ||||
| }; | ||||
| @ -264,15 +264,15 @@ TORCH_MODULE(ConvTranspose2d); | ||||
| /// Applies the ConvTranspose3d function. | ||||
| /// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose3d to | ||||
| /// learn about the exact behavior of this module. | ||||
| class TORCH_API ConvTranspose3dImpl : public ConvTransposeImpl<3, ConvTranspose3dImpl> { | ||||
| class TORCH_API ConvTranspose3dImpl : public ConvTransposeNdImpl<3, ConvTranspose3dImpl> { | ||||
|  public: | ||||
|   ConvTranspose3dImpl( | ||||
|       int64_t input_channels, | ||||
|       int64_t output_channels, | ||||
|       ExpandingArray<3> kernel_size) | ||||
|       : ConvTranspose3dImpl(ConvTransposeOptions<3>(input_channels, output_channels, kernel_size)) { | ||||
|       : ConvTranspose3dImpl(ConvTranspose3dOptions(input_channels, output_channels, kernel_size)) { | ||||
|   } | ||||
|   explicit ConvTranspose3dImpl(ConvTransposeOptions<3> options_); | ||||
|   explicit ConvTranspose3dImpl(ConvTranspose3dOptions options_); | ||||
|   Tensor forward(const Tensor& input, | ||||
|                  const c10::optional<at::IntArrayRef>& output_size = c10::nullopt); | ||||
| }; | ||||
|  | ||||
| @ -14,7 +14,7 @@ class InstanceNormImpl : public torch::nn::NormImplBase<D, Derived, InstanceNorm | ||||
|  | ||||
|   Tensor forward(const Tensor& input) { | ||||
|     this->_check_input_dim(input); | ||||
|     return F::detail::instance_norm( | ||||
|     return torch::nn::functional::detail::instance_norm( | ||||
|       input, this->running_mean, this->running_var, this->weight, this->bias, | ||||
|       this->is_training() || !this->options.track_running_stats(), this->options.momentum(), this->options.eps()); | ||||
|   } | ||||
|  | ||||
| @ -9,12 +9,14 @@ | ||||
| namespace torch { | ||||
| namespace nn { | ||||
|  | ||||
| /// Options for a `D`-dimensional convolution module. | ||||
| template <size_t D> | ||||
| struct ConvOptions { | ||||
|   typedef c10::variant<enumtype::kZeros, enumtype::kCircular> padding_mode_t; | ||||
| namespace detail { | ||||
|  | ||||
|   ConvOptions( | ||||
| typedef c10::variant<enumtype::kZeros, enumtype::kCircular> conv_padding_mode_t; | ||||
|  | ||||
| /// Options for a `D`-dimensional convolution or convolution transpose module. | ||||
| template <size_t D> | ||||
| struct ConvNdOptions { | ||||
|   ConvNdOptions( | ||||
|       int64_t in_channels, | ||||
|       int64_t out_channels, | ||||
|       ExpandingArray<D> kernel_size) : | ||||
| @ -73,6 +75,67 @@ struct ConvOptions { | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(bool, bias) = true; | ||||
|  | ||||
|   /// Accepted values `zeros` and `circular` Default: `zeros` | ||||
|   TORCH_ARG(conv_padding_mode_t, padding_mode) = torch::kZeros; | ||||
| }; | ||||
|  | ||||
| } // namespace detail | ||||
|  | ||||
| // ============================================================================ | ||||
|  | ||||
| /// Options for a `D`-dimensional convolution module. | ||||
| template <size_t D> | ||||
| struct ConvOptions { | ||||
|   using padding_mode_t = detail::conv_padding_mode_t; | ||||
|  | ||||
|   ConvOptions( | ||||
|       int64_t in_channels, | ||||
|       int64_t out_channels, | ||||
|       ExpandingArray<D> kernel_size) : | ||||
|                 in_channels_(in_channels), | ||||
|                 out_channels_(out_channels), | ||||
|                 kernel_size_(std::move(kernel_size)) {} | ||||
|  | ||||
|   /// The number of channels the input volumes will have. | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(int64_t, in_channels); | ||||
|  | ||||
|   /// The number of output channels the convolution should produce. | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(int64_t, out_channels); | ||||
|  | ||||
|   /// The kernel size to use. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, kernel_size); | ||||
|  | ||||
|   /// The stride of the convolution. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, stride) = 1; | ||||
|  | ||||
|   /// The padding to add to the input volumes. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, padding) = 0; | ||||
|  | ||||
|   /// The kernel dilation. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, dilation) = 1; | ||||
|  | ||||
|   /// The number of convolution groups. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(int64_t, groups) = 1; | ||||
|  | ||||
|   /// Whether to add a bias after individual applications of the kernel. | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(bool, bias) = true; | ||||
|  | ||||
|   /// Accepted values `zeros` and `circular` Default: `zeros` | ||||
|   TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros; | ||||
| }; | ||||
| @ -129,8 +192,67 @@ using Conv3dFuncOptions = ConvFuncOptions<3>; | ||||
|  | ||||
| // ============================================================================ | ||||
|  | ||||
| template<size_t D> | ||||
| using ConvTransposeOptions = ConvOptions<D>; | ||||
| template <size_t D> | ||||
| struct ConvTransposeOptions { | ||||
|   using padding_mode_t = detail::conv_padding_mode_t; | ||||
|  | ||||
|   ConvTransposeOptions( | ||||
|       int64_t in_channels, | ||||
|       int64_t out_channels, | ||||
|       ExpandingArray<D> kernel_size) : | ||||
|                 in_channels_(in_channels), | ||||
|                 out_channels_(out_channels), | ||||
|                 kernel_size_(std::move(kernel_size)) {} | ||||
|  | ||||
|   /// The number of channels the input volumes will have. | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(int64_t, in_channels); | ||||
|  | ||||
|   /// The number of output channels the convolution should produce. | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(int64_t, out_channels); | ||||
|  | ||||
|   /// The kernel size to use. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, kernel_size); | ||||
|  | ||||
|   /// The stride of the convolution. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, stride) = 1; | ||||
|  | ||||
|   /// The padding to add to the input volumes. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, padding) = 0; | ||||
|  | ||||
|   /// For transpose convolutions, the padding to add to output volumes. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, output_padding) = 0; | ||||
|  | ||||
|   /// The number of convolution groups. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(int64_t, groups) = 1; | ||||
|  | ||||
|   /// Whether to add a bias after individual applications of the kernel. | ||||
|   /// Changing this parameter after construction __has no effect__. | ||||
|   TORCH_ARG(bool, bias) = true; | ||||
|  | ||||
|   /// The kernel dilation. | ||||
|   /// For a `D`-dim convolution, must be a single number or a list of `D` | ||||
|   /// numbers. | ||||
|   /// This parameter __can__ be changed after construction. | ||||
|   TORCH_ARG(ExpandingArray<D>, dilation) = 1; | ||||
|  | ||||
|   /// Accepted values `zeros` and `circular` Default: `zeros` | ||||
|   TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros; | ||||
| }; | ||||
|  | ||||
| /// `ConvTransposeOptions` specialized for 1-D convolution. | ||||
| using ConvTranspose1dOptions = ConvTransposeOptions<1>; | ||||
|  | ||||
| @ -100,7 +100,7 @@ void replicate_grad_edges( | ||||
|     const std::vector<std::shared_ptr<ModuleType>>& replicas, | ||||
|     const std::vector<Device>& devices) { | ||||
|  | ||||
|   for (auto& parameter : module->parameters_) { | ||||
|   for (auto& parameter : module->named_parameters(/*recurse=*/false)) { | ||||
|     auto grad_fn = std::make_shared<ReduceAdd>((*parameter).device()); | ||||
|     grad_fn->set_next_edges(autograd::collect_next_edges(*parameter)); | ||||
|  | ||||
| @ -109,7 +109,7 @@ void replicate_grad_edges( | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   for (auto& buffer : module->buffers_) { | ||||
|   for (auto& buffer : module->named_buffers(/*recurse=*/false)) { | ||||
|     if (buffer.value().requires_grad()){ | ||||
|       auto grad_fn = std::make_shared<ReduceAdd>((*buffer).device()); | ||||
|       grad_fn->set_next_edges(autograd::collect_next_edges(*buffer)); | ||||
|  | ||||
| @ -32,15 +32,6 @@ std::string join_name(const std::string& name_prefix, const std::string& name) { | ||||
|   full_name += name; | ||||
|   return full_name; | ||||
| } | ||||
|  | ||||
| void extend( | ||||
|     std::vector<Tensor>& vector, | ||||
|     const OrderedDict<std::string, Tensor>& dict) { | ||||
|   vector.reserve(vector.size() + dict.size()); | ||||
|   for (const auto& item : dict) { | ||||
|     vector.push_back(item.value()); | ||||
|   } | ||||
| } | ||||
| } // namespace | ||||
|  | ||||
| Module::Module() | ||||
| @ -141,46 +132,48 @@ void Module::apply( | ||||
| } | ||||
|  | ||||
| std::vector<Tensor> Module::parameters(bool recurse) const { | ||||
|   if (!recurse) { | ||||
|     return parameters_.values(); | ||||
|   } | ||||
|   std::vector<Tensor> result; | ||||
|   apply( | ||||
|       [&result](const Module& module) { extend(result, module.parameters_); }); | ||||
|   return result; | ||||
|   return named_parameters(recurse).values(); | ||||
| } | ||||
|  | ||||
| OrderedDict<std::string, Tensor> Module::named_parameters(bool recurse) const { | ||||
|   if (!recurse) { | ||||
|     return parameters_; | ||||
|   } | ||||
|   OrderedDict<std::string, Tensor> result; | ||||
|   apply([&result](const std::string& name, const Module& module) { | ||||
|     for (const auto& parameter : module.parameters_) { | ||||
|       result.insert(join_name(name, parameter.key()), parameter.value()); | ||||
|   if (!recurse) { | ||||
|     for (const auto& parameter : parameters_) { | ||||
|       if (parameter.value().defined()) { | ||||
|         result.insert(parameter.key(), parameter.value()); | ||||
|       } | ||||
|     } | ||||
|   }); | ||||
|   } else { | ||||
|     apply([&result](const std::string& name, const Module& module) { | ||||
|       for (const auto& parameter : module.named_parameters(/*recurse=*/false)) { | ||||
|         TORCH_INTERNAL_ASSERT(parameter.value().defined()); | ||||
|         result.insert(join_name(name, parameter.key()), parameter.value()); | ||||
|       } | ||||
|     }); | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| std::vector<Tensor> Module::buffers(bool recurse) const { | ||||
|   if (!recurse) { | ||||
|     return buffers_.values(); | ||||
|   } | ||||
|   std::vector<Tensor> result; | ||||
|   apply([&result](const Module& module) { extend(result, module.buffers_); }); | ||||
|   return result; | ||||
|   return named_buffers(recurse).values(); | ||||
| } | ||||
|  | ||||
| OrderedDict<std::string, Tensor> Module::named_buffers(bool recurse) const { | ||||
|   if (!recurse) { | ||||
|     return buffers_; | ||||
|   } | ||||
|   OrderedDict<std::string, Tensor> result; | ||||
|   apply([&result](const std::string& name, const Module& module) { | ||||
|     for (const auto& buffer : module.buffers_) { | ||||
|       result.insert(join_name(name, buffer.key()), buffer.value()); | ||||
|   if (!recurse) { | ||||
|     for (const auto& buffer : buffers_) { | ||||
|       if (buffer.value().defined()) { | ||||
|         result.insert(buffer.key(), buffer.value()); | ||||
|       } | ||||
|     } | ||||
|   }); | ||||
|   } else { | ||||
|     apply([&result](const std::string& name, const Module& module) { | ||||
|       for (const auto& buffer : module.named_buffers(/*recurse=*/false)) { | ||||
|         TORCH_INTERNAL_ASSERT(buffer.value().defined()); | ||||
|         result.insert(join_name(name, buffer.key()), buffer.value()); | ||||
|       } | ||||
|     }); | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| @ -261,7 +254,7 @@ void Module::zero_grad() { | ||||
|   for (auto& child : children_) { | ||||
|     child.value()->zero_grad(); | ||||
|   } | ||||
|   for (auto& parameter : parameters_) { | ||||
|   for (auto& parameter : named_parameters(/*recurse=*/false)) { | ||||
|     auto& grad = parameter->grad(); | ||||
|     if (grad.defined()) { | ||||
|       grad = grad.detach(); | ||||
| @ -271,10 +264,10 @@ void Module::zero_grad() { | ||||
| } | ||||
|  | ||||
| void Module::save(serialize::OutputArchive& archive) const { | ||||
|   for (const auto& parameter : parameters_) { | ||||
|   for (const auto& parameter : named_parameters(/*recurse=*/false)) { | ||||
|     archive.write(parameter.key(), parameter.value()); | ||||
|   } | ||||
|   for (const auto& buffer : buffers_) { | ||||
|   for (const auto& buffer : named_buffers(/*recurse=*/false)) { | ||||
|     archive.write(buffer.key(), buffer.value(), /*is_buffer=*/true); | ||||
|   } | ||||
|   for (const auto& child : children_) { | ||||
| @ -287,10 +280,10 @@ void Module::save(serialize::OutputArchive& archive) const { | ||||
| } | ||||
|  | ||||
| void Module::load(serialize::InputArchive& archive) { | ||||
|   for (auto& parameter : parameters_) { | ||||
|   for (auto& parameter : named_parameters(/*recurse=*/false)) { | ||||
|     archive.read(parameter.key(), parameter.value()); | ||||
|   } | ||||
|   for (auto& buffer : buffers_) { | ||||
|   for (auto& buffer : named_buffers(/*recurse=*/false)) { | ||||
|     archive.read(buffer.key(), buffer.value(), /*is_buffer=*/true); | ||||
|   } | ||||
|   for (const auto& child : children_) { | ||||
|  | ||||
| @ -19,8 +19,20 @@ namespace F = torch::nn::functional; | ||||
| namespace torch { | ||||
| namespace nn { | ||||
| Conv1dImpl::Conv1dImpl( | ||||
|     ConvOptions<1> options_) | ||||
|     : ConvImpl(options_.transposed(false).output_padding(0)) {} | ||||
|     Conv1dOptions options_) | ||||
|     : ConvNdImpl( | ||||
|         detail::ConvNdOptions<1>( | ||||
|           /*in_channels=*/options_.in_channels(), | ||||
|           /*out_channels=*/options_.out_channels(), | ||||
|           /*kernel_size=*/options_.kernel_size()) | ||||
|           .stride(options_.stride()) | ||||
|           .padding(options_.padding()) | ||||
|           .dilation(options_.dilation()) | ||||
|           .transposed(false) | ||||
|           .output_padding(0) | ||||
|           .groups(options_.groups()) | ||||
|           .bias(options_.bias()) | ||||
|           .padding_mode(options_.padding_mode())) {} | ||||
|  | ||||
| Tensor Conv1dImpl::forward(const Tensor& input) { | ||||
|   if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) { | ||||
| @ -44,8 +56,20 @@ Tensor Conv1dImpl::forward(const Tensor& input) { | ||||
| } | ||||
|  | ||||
| Conv2dImpl::Conv2dImpl( | ||||
|     ConvOptions<2> options_) | ||||
|     : ConvImpl(options_.transposed(false).output_padding(0)) {} | ||||
|     Conv2dOptions options_) | ||||
|     : ConvNdImpl( | ||||
|         detail::ConvNdOptions<2>( | ||||
|           /*in_channels=*/options_.in_channels(), | ||||
|           /*out_channels=*/options_.out_channels(), | ||||
|           /*kernel_size=*/options_.kernel_size()) | ||||
|           .stride(options_.stride()) | ||||
|           .padding(options_.padding()) | ||||
|           .dilation(options_.dilation()) | ||||
|           .transposed(false) | ||||
|           .output_padding(0) | ||||
|           .groups(options_.groups()) | ||||
|           .bias(options_.bias()) | ||||
|           .padding_mode(options_.padding_mode())) {} | ||||
|  | ||||
| Tensor Conv2dImpl::forward(const Tensor& input) { | ||||
|   if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) { | ||||
| @ -71,8 +95,20 @@ Tensor Conv2dImpl::forward(const Tensor& input) { | ||||
| } | ||||
|  | ||||
| Conv3dImpl::Conv3dImpl( | ||||
|     ConvOptions<3> options_) | ||||
|     : ConvImpl(options_.transposed(false).output_padding(0)) {} | ||||
|     Conv3dOptions options_) | ||||
|     : ConvNdImpl( | ||||
|         detail::ConvNdOptions<3>( | ||||
|           /*in_channels=*/options_.in_channels(), | ||||
|           /*out_channels=*/options_.out_channels(), | ||||
|           /*kernel_size=*/options_.kernel_size()) | ||||
|           .stride(options_.stride()) | ||||
|           .padding(options_.padding()) | ||||
|           .dilation(options_.dilation()) | ||||
|           .transposed(false) | ||||
|           .output_padding(0) | ||||
|           .groups(options_.groups()) | ||||
|           .bias(options_.bias()) | ||||
|           .padding_mode(options_.padding_mode())) {} | ||||
|  | ||||
| Tensor Conv3dImpl::forward(const Tensor& input) { | ||||
|   if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) { | ||||
| @ -98,14 +134,14 @@ Tensor Conv3dImpl::forward(const Tensor& input) { | ||||
|     options.groups()); | ||||
| } | ||||
|  | ||||
| template class ConvImpl<1, Conv1dImpl>; | ||||
| template class ConvImpl<2, Conv2dImpl>; | ||||
| template class ConvImpl<3, Conv3dImpl>; | ||||
| template class ConvNdImpl<1, Conv1dImpl>; | ||||
| template class ConvNdImpl<2, Conv2dImpl>; | ||||
| template class ConvNdImpl<3, Conv3dImpl>; | ||||
|  | ||||
| // ============================================================================ | ||||
|  | ||||
| template <size_t D, typename Derived> | ||||
| std::vector<int64_t> ConvTransposeImpl<D, Derived>::_output_padding( | ||||
| std::vector<int64_t> ConvTransposeNdImpl<D, Derived>::_output_padding( | ||||
|     const Tensor& input, const c10::optional<at::IntArrayRef>& output_size, | ||||
|     const ExpandingArray<D>& stride, const ExpandingArray<D>& padding, | ||||
|     const ExpandingArray<D>& kernel_size) { | ||||
| @ -151,7 +187,20 @@ std::vector<int64_t> ConvTransposeImpl<D, Derived>::_output_padding( | ||||
| } | ||||
|  | ||||
| ConvTranspose1dImpl::ConvTranspose1dImpl( | ||||
|     ConvTransposeOptions<1> options_) : ConvTransposeImpl(options_.transposed(true)) {} | ||||
|     ConvTranspose1dOptions options_) | ||||
|     : ConvTransposeNdImpl( | ||||
|         detail::ConvNdOptions<1>( | ||||
|           /*in_channels=*/options_.in_channels(), | ||||
|           /*out_channels=*/options_.out_channels(), | ||||
|           /*kernel_size=*/options_.kernel_size()) | ||||
|           .stride(options_.stride()) | ||||
|           .padding(options_.padding()) | ||||
|           .dilation(options_.dilation()) | ||||
|           .transposed(true) | ||||
|           .output_padding(options_.output_padding()) | ||||
|           .groups(options_.groups()) | ||||
|           .bias(options_.bias()) | ||||
|           .padding_mode(options_.padding_mode())) {} | ||||
|  | ||||
| Tensor ConvTranspose1dImpl::forward( | ||||
|     const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) { | ||||
| @ -168,7 +217,19 @@ Tensor ConvTranspose1dImpl::forward( | ||||
| } | ||||
|  | ||||
| ConvTranspose2dImpl::ConvTranspose2dImpl( | ||||
|     ConvTransposeOptions<2> options_) : ConvTransposeImpl(options_.transposed(true)) {} | ||||
|     ConvTranspose2dOptions options_) | ||||
|     : ConvTransposeNdImpl(detail::ConvNdOptions<2>( | ||||
|           /*in_channels=*/options_.in_channels(), | ||||
|           /*out_channels=*/options_.out_channels(), | ||||
|           /*kernel_size=*/options_.kernel_size()) | ||||
|           .stride(options_.stride()) | ||||
|           .padding(options_.padding()) | ||||
|           .dilation(options_.dilation()) | ||||
|           .transposed(true) | ||||
|           .output_padding(options_.output_padding()) | ||||
|           .groups(options_.groups()) | ||||
|           .bias(options_.bias()) | ||||
|           .padding_mode(options_.padding_mode())) {} | ||||
|  | ||||
| Tensor ConvTranspose2dImpl::forward( | ||||
|     const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) { | ||||
| @ -185,7 +246,19 @@ Tensor ConvTranspose2dImpl::forward( | ||||
| } | ||||
|  | ||||
| ConvTranspose3dImpl::ConvTranspose3dImpl( | ||||
|     ConvTransposeOptions<3> options_) : ConvTransposeImpl(options_.transposed(true)) {} | ||||
|     ConvTranspose3dOptions options_) | ||||
|     : ConvTransposeNdImpl(detail::ConvNdOptions<3>( | ||||
|           /*in_channels=*/options_.in_channels(), | ||||
|           /*out_channels=*/options_.out_channels(), | ||||
|           /*kernel_size=*/options_.kernel_size()) | ||||
|           .stride(options_.stride()) | ||||
|           .padding(options_.padding()) | ||||
|           .dilation(options_.dilation()) | ||||
|           .transposed(true) | ||||
|           .output_padding(options_.output_padding()) | ||||
|           .groups(options_.groups()) | ||||
|           .bias(options_.bias()) | ||||
|           .padding_mode(options_.padding_mode())) {} | ||||
|  | ||||
| Tensor ConvTranspose3dImpl::forward( | ||||
|     const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) { | ||||
| @ -201,9 +274,9 @@ Tensor ConvTranspose3dImpl::forward( | ||||
|     output_padding, options.groups(), options.dilation()); | ||||
| } | ||||
|  | ||||
| template class ConvTransposeImpl<1, ConvTranspose1dImpl>; | ||||
| template class ConvTransposeImpl<2, ConvTranspose2dImpl>; | ||||
| template class ConvTransposeImpl<3, ConvTranspose3dImpl>; | ||||
| template class ConvTransposeNdImpl<1, ConvTranspose1dImpl>; | ||||
| template class ConvTransposeNdImpl<2, ConvTranspose2dImpl>; | ||||
| template class ConvTransposeNdImpl<3, ConvTranspose3dImpl>; | ||||
|  | ||||
| } // namespace nn | ||||
| } // namespace torch | ||||
|  | ||||
| @ -36,7 +36,8 @@ PyObject* rpc_init(PyObject* /* unused */) { | ||||
|  | ||||
|   auto rpcBackendOptions = | ||||
|       shared_ptr_class_<RpcBackendOptions>(module, "RpcBackendOptions") | ||||
|           .def_readwrite("rpc_timeout", &RpcBackendOptions::rpcTimeout); | ||||
|           .def_readwrite("rpc_timeout", &RpcBackendOptions::rpcTimeout) | ||||
|           .def_readwrite("init_method", &RpcBackendOptions::initMethod); | ||||
|  | ||||
|   auto workerInfo = | ||||
|       shared_ptr_class_<WorkerInfo>( | ||||
| @ -111,9 +112,9 @@ Otherwise, throws an exception. | ||||
|                 return PyRRef::unpickle(t); | ||||
|               })); | ||||
|  | ||||
|   // future.wait() should not be called after wait_all_workers(), e.g., | ||||
|   // pythonRpcHandler is cleaned up in wait_all_workers(), after | ||||
|   // wait_all_workers(), python objects returned from rpc python call can not be | ||||
|   // future.wait() should not be called after shutdown(), e.g., | ||||
|   // pythonRpcHandler is cleaned up in shutdown(), after | ||||
|   // shutdown(), python objects returned from rpc python call can not be | ||||
|   // resolved. | ||||
|   auto futureMessage = | ||||
|       shared_ptr_class_<FutureMessage>(module, "FutureMessage") | ||||
| @ -154,6 +155,10 @@ Otherwise, throws an exception. | ||||
|           "join", | ||||
|           &ProcessGroupAgent::join, | ||||
|           py::call_guard<py::gil_scoped_release>()) | ||||
|       .def( | ||||
|           "shutdown", | ||||
|           &ProcessGroupAgent::shutdown, | ||||
|           py::call_guard<py::gil_scoped_release>()) | ||||
|       .def( | ||||
|           "sync", | ||||
|           &ProcessGroupAgent::sync, | ||||
| @ -164,8 +169,8 @@ Otherwise, throws an exception. | ||||
|     agent->start(); | ||||
|   }); | ||||
|  | ||||
|   module.def("_destroy_rref_context", []() { | ||||
|     RRefContext::getInstance().destroyInstance(); | ||||
|   module.def("_destroy_rref_context", [](bool ignoreRRefLeak) { | ||||
|     RRefContext::getInstance().destroyInstance(ignoreRRefLeak); | ||||
|   }); | ||||
|  | ||||
|   module.def("_cleanup_python_rpc_handler", []() { | ||||
|  | ||||
| @ -127,7 +127,6 @@ ProcessGroupAgent::ProcessGroupAgent( | ||||
|           WorkerInfo(std::move(workerName), pg->getRank()), | ||||
|           c10::guts::make_unique<RequestCallbackImpl>(), | ||||
|           rpcTimeout), | ||||
|       shutdown_{false}, | ||||
|       pg_(std::move(pg)), | ||||
|       sendCounts_(pg_->getSize()), | ||||
|       recvCounts_(pg_->getSize()), | ||||
| @ -180,30 +179,12 @@ const WorkerInfo& ProcessGroupAgent::getWorkerInfo(worker_id_t id) const { | ||||
| } | ||||
|  | ||||
| void ProcessGroupAgent::join() { | ||||
|   // Every process i sends a SHUTDOWN message to process i + 1. This is | ||||
|   // necessary for now because: | ||||
|   // 1. There is no abort API for ProcessGroup::recvAnysource yet. We have to | ||||
|   //    feed it a message or kill the thread. | ||||
|   // 2. A GLOO process cannot send message to itself. (there is an ongoing | ||||
|   //    effort to fix this problem). | ||||
|   shutdown_.store(true); | ||||
|   sync(); | ||||
|   // This is needed in case no futures were created, otherwise the future | ||||
|   // timeout watchdog would sleep forever. | ||||
|  | ||||
|   futureTimeoutCV_.notify_one(); | ||||
|   std::unique_lock<std::mutex> lock(futureMutex_); | ||||
|   futureCV_.wait( | ||||
|       lock, [this] { return futures_.empty() && futureTimeouts_.empty(); }); | ||||
|   lock.unlock(); | ||||
|   pg_->barrier()->wait(); | ||||
|   int dst = (pg_->getRank() + 1) % pg_->getSize(); | ||||
|   enqueueSend( | ||||
|       SendWork(allWorkerInfo_[dst], Message({}, {}, MessageType::SHUTDOWN))); | ||||
|   threadPool_.waitWorkComplete(); | ||||
|   listenerThread_.join(); | ||||
|   futureTimeoutThread_.join(); | ||||
|   PythonRpcHandler::getInstance().cleanup(); | ||||
| } | ||||
|  | ||||
| bool ProcessGroupAgent::hasPendingMessage() { | ||||
| @ -269,14 +250,38 @@ void ProcessGroupAgent::sync() { | ||||
| } | ||||
|  | ||||
| void ProcessGroupAgent::start() { | ||||
|   { | ||||
|     std::lock_guard<std::mutex> futureLock{futureMutex_}; | ||||
|     rpcRunning_.store(true); | ||||
|   } | ||||
|   listenerThread_ = std::thread(&ProcessGroupAgent::listenLoop, this); | ||||
|   futureTimeoutThread_ = | ||||
|       std::thread(&ProcessGroupAgent::pollTimedOutRPCs, this); | ||||
| } | ||||
|  | ||||
| void ProcessGroupAgent::shutdown() { | ||||
|   LOG(INFO) << "Shutting down ProcessGroupAgent."; | ||||
|   std::unique_lock<std::mutex> lock{futureMutex_}; | ||||
|   if (!rpcRunning_.exchange(false)) { | ||||
|     return; | ||||
|   } | ||||
|   lock.unlock(); | ||||
|   futureTimeoutCV_.notify_one(); | ||||
|   futureTimeoutThread_.join(); | ||||
|   { | ||||
|     std::unique_lock<std::mutex> lock(recvWorkMutex_); | ||||
|     if (recvWork_) { | ||||
|       recvWork_->abort(); | ||||
|     } | ||||
|   } | ||||
|   threadPool_.waitWorkComplete(); | ||||
|   listenerThread_.join(); | ||||
| } | ||||
|  | ||||
| std::shared_ptr<FutureMessage> ProcessGroupAgent::send( | ||||
|     const WorkerInfo& to, | ||||
|     Message&& message) { | ||||
|   TORCH_CHECK(rpcRunning_.load(), "ProcessGroupAgent hasn't started.") | ||||
|   TORCH_CHECK( | ||||
|       to.id_ < (worker_id_t)pg_->getSize(), | ||||
|       "Destination rank is out of bound, got ", | ||||
| @ -456,10 +461,19 @@ void ProcessGroupAgent::enqueueRecv(RecvWork work) { | ||||
| } | ||||
|  | ||||
| void ProcessGroupAgent::listenLoop() { | ||||
|   while (true) { | ||||
|   while (rpcRunning_.load()) { | ||||
|     // rank, tensor size, message type | ||||
|     std::vector<torch::Tensor> preamble = {torch::empty({3}, {torch::kInt64})}; | ||||
|     pg_->recvAnysource(preamble, pg_->getRank())->wait(); | ||||
|     auto work = pg_->recvAnysource(preamble, pg_->getRank()); | ||||
|     { | ||||
|       std::lock_guard<std::mutex> guard(recvWorkMutex_); | ||||
|       recvWork_ = work; | ||||
|     } | ||||
|  | ||||
|     if (!rpcRunning_.load() || !work->wait() /* aborted */) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     int64_t* preamble_items = preamble.front().storage().data<int64_t>(); | ||||
|  | ||||
|     auto srcRank = preamble_items[0]; | ||||
| @ -483,9 +497,12 @@ void ProcessGroupAgent::listenLoop() { | ||||
| } | ||||
|  | ||||
| void ProcessGroupAgent::pollTimedOutRPCs() { | ||||
|   while (!shutdown_.load()) { | ||||
|     std::chrono::milliseconds sleepTime; | ||||
|   while (true) { | ||||
|     std::unique_lock<std::mutex> lock{futureMutex_}; | ||||
|     if (!rpcRunning_.load()) { | ||||
|       return; | ||||
|     } | ||||
|     std::chrono::milliseconds sleepTime; | ||||
|     // Estimate amount of time the first future will time out in, and sleep | ||||
|     // for that long. | ||||
|     // if there are no futures or the first future's RPC timeout is set to 0 | ||||
| @ -505,7 +522,7 @@ void ProcessGroupAgent::pollTimedOutRPCs() { | ||||
|       futureTimeoutCV_.wait_for(lock, sleepTime); | ||||
|     } | ||||
|  | ||||
|     if (shutdown_.load()) { | ||||
|     if (!rpcRunning_.load()) { | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|  | ||||
| @ -14,7 +14,7 @@ namespace distributed { | ||||
| namespace rpc { | ||||
|  | ||||
| struct ProcessGroupRpcBackendOptions : public RpcBackendOptions { | ||||
|   ProcessGroupRpcBackendOptions() noexcept = default; | ||||
|   ProcessGroupRpcBackendOptions() = default; | ||||
|   int numSendRecvThreads; | ||||
| }; | ||||
|  | ||||
| @ -57,6 +57,8 @@ class ProcessGroupAgent : public RpcAgent { | ||||
|  | ||||
|   void start() override; | ||||
|  | ||||
|   void shutdown() override; | ||||
|  | ||||
|  protected: | ||||
|   // This method wraps the destination information and the message into a | ||||
|   // SendWork object, and put the SendWork into a queue. Another thread will | ||||
| @ -143,10 +145,6 @@ class ProcessGroupAgent : public RpcAgent { | ||||
|     return ++nextId_; | ||||
|   } | ||||
|  | ||||
|   // atomic bool indicating if join() has been called and background threads | ||||
|   // should shutdown. | ||||
|   std::atomic_bool shutdown_; | ||||
|  | ||||
|   std::shared_ptr<c10d::ProcessGroup> pg_; | ||||
|   // worker name -> rank | ||||
|   std::unordered_map<std::string, int> nameMap_; | ||||
| @ -159,12 +157,23 @@ class ProcessGroupAgent : public RpcAgent { | ||||
|   MessageCounter recvCounts_; | ||||
|  | ||||
|   std::atomic<int64_t> nextId_; | ||||
|   // atomic bool indicating if this agent is running. It is set in | ||||
|   // ProcessGroupAgent::start and unset in ProcessGroupAgent::shutdown and | ||||
|   // ProcessGroupAgent::join. It controls whether several background threads | ||||
|   // should be running. | ||||
|   // We lock access to this in shutdown() and pollTimedOutRPCs() to prevent race | ||||
|   // conditions when notifying condition variables. | ||||
|   std::atomic<bool> rpcRunning_{false}; | ||||
|   // one mutex per ProcessGroup rank, as ProcessGroup::send is not thread-safe | ||||
|   // when using the same tag. | ||||
|   std::vector<std::mutex> sendMutexes_; | ||||
|   std::thread listenerThread_; | ||||
|   // A thread to poll existing futures and check for timed out ones. | ||||
|   std::thread futureTimeoutThread_; | ||||
|   // Lock and shared ptr to currently pending work, set in listenloop() and | ||||
|   // interruptible in shutdown(). | ||||
|   std::mutex recvWorkMutex_; | ||||
|   std::shared_ptr<c10d::ProcessGroup::Work> recvWork_; | ||||
|   // A threadPool that processing both SendWork and RecvWork. There are two | ||||
|   // motivations for adding a ThreadPool: | ||||
|   // (1) RPC serialization/deserialization and processing can be expensive, | ||||
|  | ||||
| @ -13,8 +13,9 @@ namespace distributed { | ||||
| namespace rpc { | ||||
|  | ||||
| struct RpcBackendOptions { | ||||
|   RpcBackendOptions() noexcept = default; | ||||
|   RpcBackendOptions() = default; | ||||
|   std::chrono::milliseconds rpcTimeout; | ||||
|   std::string initMethod; | ||||
| }; | ||||
|  | ||||
| // A globally unique ID to identify an RpcAgent | ||||
| @ -124,7 +125,11 @@ class TORCH_API RpcAgent { | ||||
|   virtual void sync() = 0; | ||||
|  | ||||
|   // start accepting requests | ||||
|   virtual void start() {} | ||||
|   virtual void start() = 0; | ||||
|  | ||||
|   // Stop accepting requests and shutdown the RPC framework as soon as possible | ||||
|   // by terminating all RPC threads. | ||||
|   virtual void shutdown() = 0; | ||||
|  | ||||
|   // Set the default rpc agent. | ||||
|   static void setDefaultRpcAgent(std::shared_ptr<RpcAgent> defaultRpcAgent); | ||||
|  | ||||
| @ -136,15 +136,16 @@ UserRRef<T>::UserRRef( | ||||
|  | ||||
| template <typename T> | ||||
| UserRRef<T>::~UserRRef() { | ||||
|   // TODO: queue this in RRefContext instead of doing it here. | ||||
|   auto& ctx = RRefContext::getInstance(); | ||||
|   if (ctx.getWorkerId() != ownerId_) { | ||||
|     auto fm = ctx.agent()->send( | ||||
|         ctx.agent()->getWorkerInfo(ownerId_), | ||||
|         RRefUserDelete(rrefId_, forkId_).toMessage()); | ||||
|  | ||||
|     fm->addCallback( | ||||
|         [](const Message& message) { RRefContext::handleException(message); }); | ||||
|   try { | ||||
|     RRefContext::getInstance().delUser(ownerId_, rrefId_, forkId_); | ||||
|   } catch (const std::exception& ex) { | ||||
|     LOG(ERROR) << "Error occurred when deleting UserRRef instance, " | ||||
|                << "RRefId = " << rrefId_ << ", ForkId = " << forkId_ << " : " | ||||
|                << ex.what(); | ||||
|   } catch (...) { | ||||
|     LOG(ERROR) << "Error occurred when deleting UserRRef instance, " | ||||
|                << "RRefId = " << rrefId_ << ", ForkId = " << forkId_ << " : " | ||||
|                << "unknown error"; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -13,8 +13,13 @@ RRefContext& RRefContext::getInstance() { | ||||
|   return *context; | ||||
| } | ||||
|  | ||||
| void RRefContext::destroyInstance() { | ||||
|   RRefContext::getInstance().checkRRefLeaks(); | ||||
| void RRefContext::destroyInstance(bool ignoreRRefLeak) { | ||||
|   auto& ctx = RRefContext::getInstance(); | ||||
|   { | ||||
|     std::lock_guard<std::mutex> lock(ctx.destroyedMutex_); | ||||
|     ctx.destroyed_ = true; | ||||
|   } | ||||
|   ctx.checkRRefLeaks(ignoreRRefLeak); | ||||
| } | ||||
|  | ||||
| void RRefContext::handleException(const Message& message) { | ||||
| @ -27,7 +32,7 @@ void RRefContext::handleException(const Message& message) { | ||||
| } | ||||
|  | ||||
| RRefContext::RRefContext(std::shared_ptr<RpcAgent> agent) | ||||
|     : agent_(std::move(agent)) {} | ||||
|     : agent_(std::move(agent)), destroyed_(false) {} | ||||
|  | ||||
| RRefContext::~RRefContext() { | ||||
|   if (!owners_.empty()) { | ||||
| @ -36,7 +41,7 @@ RRefContext::~RRefContext() { | ||||
|   } | ||||
| } | ||||
|  | ||||
| void RRefContext::checkRRefLeaks() { | ||||
| void RRefContext::checkRRefLeaks(bool ignoreRRefLeak) { | ||||
|   if (!forks_.empty()) { | ||||
|     std::stringstream ss; | ||||
|     for (auto& entry : forks_) { | ||||
| @ -46,7 +51,21 @@ void RRefContext::checkRRefLeaks() { | ||||
|            << std::endl; | ||||
|       } | ||||
|     } | ||||
|     AT_ERROR(ss.str()); | ||||
|  | ||||
|     if (ignoreRRefLeak) { | ||||
|       LOG(WARNING) | ||||
|           << "Detected RRef Leaks during shutdown. This usually " | ||||
|           << "occurs when the application code still holds references to RRef " | ||||
|           << "instances when calling shutdown(). If the program has " | ||||
|           << "completed correctly and the process is exiting, it is OK to " | ||||
|           << "ignore these leaks. However, if you program will keep running " | ||||
|           << "after this, these leaks could result in memory leaks on RRef " | ||||
|           << "owners. Please make sure all RRefs are out of scope and Python " | ||||
|           << "GC has deleted them before calling shutdown(): \n" | ||||
|           << ss.str(); | ||||
|     } else { | ||||
|       AT_ERROR(ss.str()); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| @ -96,6 +115,21 @@ template std::shared_ptr<UserRRef<py::object>> RRefContext::createUserRRef< | ||||
|     const RRefId& rrefId, | ||||
|     const ForkId& forkId); | ||||
|  | ||||
| void RRefContext::delUser( | ||||
|     const worker_id_t owner, | ||||
|     const RRefId& rrefId, | ||||
|     const ForkId& forkId) { | ||||
|   std::lock_guard<std::mutex> lock(destroyedMutex_); | ||||
|   if (!destroyed_) { | ||||
|     auto fm = agent_->send( | ||||
|         agent_->getWorkerInfo(owner), | ||||
|         RRefUserDelete(rrefId, forkId).toMessage()); | ||||
|  | ||||
|     fm->addCallback( | ||||
|         [](const Message& message) { RRefContext::handleException(message); }); | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| std::shared_ptr<RRef> RRefContext::getOrCreateRRef(const RRefForkData& rfd) { | ||||
|   auto& ownerId = rfd.ownerId_; | ||||
|  | ||||
| @ -16,7 +16,7 @@ namespace rpc { | ||||
| class RRefContext { | ||||
|  public: | ||||
|   static RRefContext& getInstance(); | ||||
|   static void destroyInstance(); | ||||
|   static void destroyInstance(bool ignoreRRefLeak = true); | ||||
|  | ||||
|   static void handleException(const Message& message); | ||||
|  | ||||
| @ -111,6 +111,11 @@ class RRefContext { | ||||
|   void addPendingUser(const ForkId& forkId, const std::shared_ptr<RRef>& rref); | ||||
|   void delPendingUser(const ForkId& forkId); | ||||
|  | ||||
|   void delUser( | ||||
|       const worker_id_t owner, | ||||
|       const RRefId& rrefId, | ||||
|       const ForkId& forkId); | ||||
|  | ||||
|  private: | ||||
|   RRefContext(std::shared_ptr<RpcAgent>); | ||||
|  | ||||
| @ -123,7 +128,7 @@ class RRefContext { | ||||
|   void finishForkRequest(const ForkId& forkId, worker_id_t parent); | ||||
|  | ||||
|   // If there is any leak on any RRef, this method will throw an error. | ||||
|   void checkRRefLeaks(); | ||||
|   void checkRRefLeaks(bool ignoreRRefLeak); | ||||
|  | ||||
|   static std::atomic<local_id_t> nextLocalId_; | ||||
|  | ||||
| @ -157,6 +162,9 @@ class RRefContext { | ||||
|   //     owner learns about the forked child. | ||||
|   std::unordered_map<ForkId, std::shared_ptr<RRef>, ForkId::Hash> | ||||
|       pendingChildren_; | ||||
|  | ||||
|   std::mutex destroyedMutex_; | ||||
|   bool destroyed_; | ||||
| }; | ||||
|  | ||||
| } // namespace rpc | ||||
|  | ||||
| @ -751,5 +751,33 @@ std::tuple<std::string, RawDataExportMap> export_onnx( | ||||
|       graph_encoder.get_raw_data_export_map()); | ||||
| } | ||||
|  | ||||
| namespace { | ||||
| void export_opnames(const script::Module& m, std::set<std::string>& opnames) { | ||||
|   for (const auto& method : m.get_methods()) { | ||||
|     const auto& func = method.function(); | ||||
|     for (const auto& node : func.graph()->nodes()) { | ||||
|       auto op = findOperatorFor(node); | ||||
|       if (op) { | ||||
|         auto opname = node->schema().operator_name(); | ||||
|         std::string namestr = opname.name; | ||||
|         if (!opname.overload_name.empty()) { | ||||
|           namestr += "." + opname.overload_name; | ||||
|         } | ||||
|         opnames.emplace(namestr); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   for (const auto& sub_m : m.children()) { | ||||
|     export_opnames(sub_m, opnames); | ||||
|   } | ||||
| } | ||||
| } // namespace | ||||
|  | ||||
| std::vector<std::string> export_opnames(const script::Module& m) { | ||||
|   std::set<std::string> names; | ||||
|   export_opnames(m, names); | ||||
|   return std::vector<std::string>(names.begin(), names.end()); | ||||
| } | ||||
|  | ||||
| } // namespace jit | ||||
| } // namespace torch | ||||
|  | ||||
| @ -65,5 +65,8 @@ using ExportModuleExtraFilesHook = | ||||
|     std::function<script::ExtraFilesMap(const script::Module&)>; | ||||
| TORCH_API void SetExportModuleExtraFilesHook(ExportModuleExtraFilesHook hook); | ||||
|  | ||||
| // Returns a list of names of all operators in the module and its submodules. | ||||
| TORCH_API std::vector<std::string> export_opnames(const script::Module& m); | ||||
|  | ||||
| } // namespace jit | ||||
| } // namespace torch | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	