mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-11-04 08:00:58 +08:00 
			
		
		
		
	Compare commits
	
		
			58 Commits
		
	
	
		
			export-D73
			...
			v1.4.0
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 7f73f1d591 | |||
| ac15471de4 | |||
| 49364eb426 | |||
| bcf2d65446 | |||
| f7a33f1eef | |||
| bd584d52df | |||
| c697af4667 | |||
| 0f3f4ec64c | |||
| 509df600bb | |||
| 187101a88e | |||
| e011d4a16e | |||
| 8ada95e950 | |||
| 21c2481dfe | |||
| 398e8ba182 | |||
| 074b30cdcb | |||
| 319bd5d431 | |||
| 5a20bbd377 | |||
| fa59a9e190 | |||
| 143868c3df | |||
| 964929fcc2 | |||
| cd20ecb472 | |||
| 19d4fd4910 | |||
| a7d187baa4 | |||
| 0541546ac5 | |||
| 369ab73efd | |||
| 9f558e1ee6 | |||
| f0ddfff200 | |||
| 2de184b5a9 | |||
| e0eeddfc78 | |||
| 7727b57d08 | |||
| 9e7dc37f90 | |||
| 227017059f | |||
| aeeccc1486 | |||
| 0b91246cbd | |||
| 0856d6f53c | |||
| 336e0d2874 | |||
| 3b36f2068d | |||
| 6207945564 | |||
| aecae514ab | |||
| 27a2ecb0a5 | |||
| e36fd7b0ba | |||
| 799cb646a6 | |||
| f60c63155a | |||
| 954d9ea466 | |||
| 71185fb2a0 | |||
| a06f26560c | |||
| e4cec279c6 | |||
| b8b50aa909 | |||
| db686de13f | |||
| 288e463693 | |||
| 73783d1048 | |||
| 8891d4eeb1 | |||
| 2085a6f329 | |||
| 3eda9e7da2 | |||
| fb8aa0e98c | |||
| c79b79dadd | |||
| 21acca4528 | |||
| f710757557 | 
@ -36,6 +36,8 @@ class Conf(object):
 | 
			
		||||
        # The cpu nightlies are built on the pytorch/manylinux-cuda100 docker image
 | 
			
		||||
        alt_docker_suffix = self.cuda_version or "100"
 | 
			
		||||
        docker_distro_suffix = "" if self.pydistro == "conda" else alt_docker_suffix
 | 
			
		||||
        if self.cuda_version == "101":
 | 
			
		||||
            return "soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916"
 | 
			
		||||
        return miniutils.quote("pytorch/" + docker_distro_prefix + "-cuda" + docker_distro_suffix)
 | 
			
		||||
 | 
			
		||||
    def get_name_prefix(self):
 | 
			
		||||
 | 
			
		||||
@ -24,11 +24,11 @@ CONFIG_TREE_DATA = [
 | 
			
		||||
            ("5", [
 | 
			
		||||
                XImportant("3.6"),  # This is actually the ASAN build
 | 
			
		||||
            ]),
 | 
			
		||||
            ("7", [
 | 
			
		||||
                ("3.6", [
 | 
			
		||||
                    ("xla", [XImportant(True)]),
 | 
			
		||||
                ]),
 | 
			
		||||
            ]),
 | 
			
		||||
            # ("7", [
 | 
			
		||||
            #     ("3.6", [
 | 
			
		||||
            #         ("xla", [XImportant(True)]),
 | 
			
		||||
            #     ]),
 | 
			
		||||
            # ]),
 | 
			
		||||
        ]),
 | 
			
		||||
        ("cuda", [
 | 
			
		||||
            ("9", [
 | 
			
		||||
 | 
			
		||||
@ -210,6 +210,7 @@ def instantiate_configs():
 | 
			
		||||
            android_abi = fc.find_prop("android_abi")
 | 
			
		||||
            parms_list_ignored_for_docker_image.append(android_abi)
 | 
			
		||||
            restrict_phases = ["build"]
 | 
			
		||||
            fc.props["is_important"] = True
 | 
			
		||||
 | 
			
		||||
        elif compiler_name:
 | 
			
		||||
            gcc_version = compiler_name + (fc.find_prop("compiler_version") or "")
 | 
			
		||||
 | 
			
		||||
@ -307,27 +307,28 @@ jobs:
 | 
			
		||||
          time docker pull ${DOCKER_IMAGE} >/dev/null
 | 
			
		||||
          export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
 | 
			
		||||
 | 
			
		||||
          # TODO We may want to move the rebase logic to a separate step after checkout
 | 
			
		||||
          # Rebase to master only if in xenial_py3_6_gcc5_4 case
 | 
			
		||||
          if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
 | 
			
		||||
            echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
            set -x
 | 
			
		||||
            git config --global user.email "circleci.ossci@gmail.com"
 | 
			
		||||
            git config --global user.name "CircleCI"
 | 
			
		||||
            git config remote.origin.url https://github.com/pytorch/pytorch.git
 | 
			
		||||
            git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
 | 
			
		||||
            git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
 | 
			
		||||
            export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
 | 
			
		||||
            echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
 | 
			
		||||
            export GIT_COMMIT=${CIRCLE_SHA1}
 | 
			
		||||
            echo "GIT_COMMIT: " ${GIT_COMMIT}
 | 
			
		||||
            git checkout -f ${GIT_COMMIT}
 | 
			
		||||
            git reset --hard ${GIT_COMMIT}
 | 
			
		||||
            git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
 | 
			
		||||
            set +x
 | 
			
		||||
          else
 | 
			
		||||
            echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
          fi
 | 
			
		||||
          # NB: Temporarily disable the rebase logic in v1.4.0, don't merge this change into master
 | 
			
		||||
          # # TODO We may want to move the rebase logic to a separate step after checkout
 | 
			
		||||
          # # Rebase to master only if in xenial_py3_6_gcc5_4 case
 | 
			
		||||
          # if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
 | 
			
		||||
          #   echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
          #   set -x
 | 
			
		||||
          #   git config --global user.email "circleci.ossci@gmail.com"
 | 
			
		||||
          #   git config --global user.name "CircleCI"
 | 
			
		||||
          #   git config remote.origin.url https://github.com/pytorch/pytorch.git
 | 
			
		||||
          #   git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
 | 
			
		||||
          #   git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
 | 
			
		||||
          #   export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
 | 
			
		||||
          #   echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
 | 
			
		||||
          #   export GIT_COMMIT=${CIRCLE_SHA1}
 | 
			
		||||
          #   echo "GIT_COMMIT: " ${GIT_COMMIT}
 | 
			
		||||
          #   git checkout -f ${GIT_COMMIT}
 | 
			
		||||
          #   git reset --hard ${GIT_COMMIT}
 | 
			
		||||
          #   git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
 | 
			
		||||
          #   set +x
 | 
			
		||||
          # else
 | 
			
		||||
          #   echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
          # fi
 | 
			
		||||
 | 
			
		||||
          git submodule sync && git submodule update -q --init --recursive
 | 
			
		||||
 | 
			
		||||
@ -1709,20 +1710,6 @@ workflows:
 | 
			
		||||
          build_environment: "pytorch-linux-xenial-py3-clang5-asan-test"
 | 
			
		||||
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan:405"
 | 
			
		||||
          resource_class: large
 | 
			
		||||
      - pytorch_linux_build:
 | 
			
		||||
          name: pytorch_xla_linux_xenial_py3_6_clang7_build
 | 
			
		||||
          requires:
 | 
			
		||||
            - setup
 | 
			
		||||
          build_environment: "pytorch-xla-linux-xenial-py3.6-clang7-build"
 | 
			
		||||
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-clang7:405"
 | 
			
		||||
      - pytorch_linux_test:
 | 
			
		||||
          name: pytorch_xla_linux_xenial_py3_6_clang7_test
 | 
			
		||||
          requires:
 | 
			
		||||
            - setup
 | 
			
		||||
            - pytorch_xla_linux_xenial_py3_6_clang7_build
 | 
			
		||||
          build_environment: "pytorch-xla-linux-xenial-py3.6-clang7-test"
 | 
			
		||||
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-clang7:405"
 | 
			
		||||
          resource_class: large
 | 
			
		||||
      - pytorch_linux_build:
 | 
			
		||||
          name: pytorch_linux_xenial_cuda9_cudnn7_py3_build
 | 
			
		||||
          requires:
 | 
			
		||||
@ -1874,33 +1861,18 @@ workflows:
 | 
			
		||||
          name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_64_build
 | 
			
		||||
          requires:
 | 
			
		||||
            - setup
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only:
 | 
			
		||||
                - master
 | 
			
		||||
                - /ci-all\/.*/
 | 
			
		||||
          build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64-build"
 | 
			
		||||
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405"
 | 
			
		||||
      - pytorch_linux_build:
 | 
			
		||||
          name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v7a_build
 | 
			
		||||
          requires:
 | 
			
		||||
            - setup
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only:
 | 
			
		||||
                - master
 | 
			
		||||
                - /ci-all\/.*/
 | 
			
		||||
          build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a-build"
 | 
			
		||||
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405"
 | 
			
		||||
      - pytorch_linux_build:
 | 
			
		||||
          name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v8a_build
 | 
			
		||||
          requires:
 | 
			
		||||
            - setup
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only:
 | 
			
		||||
                - master
 | 
			
		||||
                - /ci-all\/.*/
 | 
			
		||||
          build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a-build"
 | 
			
		||||
          docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405"
 | 
			
		||||
      # Warning: indentation here matters!
 | 
			
		||||
@ -2292,7 +2264,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2305,7 +2277,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2318,7 +2290,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2331,7 +2303,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2344,7 +2316,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2505,7 +2477,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2518,7 +2490,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2531,7 +2503,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2544,7 +2516,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2718,7 +2690,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          libtorch_variant: "shared-with-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2732,7 +2704,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          libtorch_variant: "shared-without-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2746,7 +2718,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          libtorch_variant: "static-with-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -2760,7 +2732,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: postnightly
 | 
			
		||||
          libtorch_variant: "static-without-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - smoke_linux_test:
 | 
			
		||||
@ -3212,7 +3184,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_manywheel_2_7mu_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "manywheel 2.7mu cu101 devtoolset7"
 | 
			
		||||
@ -3221,7 +3193,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_manywheel_3_5m_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "manywheel 3.5m cu101 devtoolset7"
 | 
			
		||||
@ -3230,7 +3202,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_manywheel_3_6m_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "manywheel 3.6m cu101 devtoolset7"
 | 
			
		||||
@ -3239,7 +3211,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_manywheel_3_7m_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "manywheel 3.7m cu101 devtoolset7"
 | 
			
		||||
@ -3248,7 +3220,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_conda_2_7_cpu_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "conda 2.7 cpu devtoolset7"
 | 
			
		||||
@ -3365,7 +3337,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_conda_3_5_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "conda 3.5 cu101 devtoolset7"
 | 
			
		||||
@ -3374,7 +3346,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_conda_3_6_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "conda 3.6 cu101 devtoolset7"
 | 
			
		||||
@ -3383,7 +3355,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_conda_3_7_cu101_devtoolset7_nightly_build
 | 
			
		||||
          build_environment: "conda 3.7 cu101 devtoolset7"
 | 
			
		||||
@ -3392,7 +3364,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_libtorch_2_7m_cpu_devtoolset7_nightly_shared-with-deps_build
 | 
			
		||||
          build_environment: "libtorch 2.7m cpu devtoolset7"
 | 
			
		||||
@ -3522,7 +3494,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "shared-with-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_shared-without-deps_build
 | 
			
		||||
          build_environment: "libtorch 2.7m cu101 devtoolset7"
 | 
			
		||||
@ -3532,7 +3504,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "shared-without-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_static-with-deps_build
 | 
			
		||||
          build_environment: "libtorch 2.7m cu101 devtoolset7"
 | 
			
		||||
@ -3542,7 +3514,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "static-with-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_static-without-deps_build
 | 
			
		||||
          build_environment: "libtorch 2.7m cu101 devtoolset7"
 | 
			
		||||
@ -3552,7 +3524,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "static-without-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
      - binary_linux_build:
 | 
			
		||||
          name: binary_linux_libtorch_2_7m_cpu_gcc5_4_cxx11-abi_nightly_shared-with-deps_build
 | 
			
		||||
          build_environment: "libtorch 2.7m cpu gcc5.4_cxx11-abi"
 | 
			
		||||
@ -4056,7 +4028,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4068,7 +4040,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4080,7 +4052,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4092,7 +4064,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4104,7 +4076,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4252,7 +4224,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4264,7 +4236,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4276,7 +4248,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4288,7 +4260,7 @@ workflows:
 | 
			
		||||
          filters:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          docker_image: "pytorch/conda-cuda"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4449,7 +4421,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "shared-with-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4462,7 +4434,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "shared-without-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4475,7 +4447,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "static-with-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
@ -4488,7 +4460,7 @@ workflows:
 | 
			
		||||
            branches:
 | 
			
		||||
              only: nightly
 | 
			
		||||
          libtorch_variant: "static-without-deps"
 | 
			
		||||
          docker_image: "pytorch/manylinux-cuda101"
 | 
			
		||||
          docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
 | 
			
		||||
          use_cuda_docker_runtime: "1"
 | 
			
		||||
          resource_class: gpu.medium
 | 
			
		||||
      - binary_linux_test:
 | 
			
		||||
 | 
			
		||||
@ -11,6 +11,8 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
 | 
			
		||||
  source activate testenv >/dev/null
 | 
			
		||||
elif [[ "$DESIRED_PYTHON" == 2.7mu ]]; then
 | 
			
		||||
  export PATH="/opt/python/cp27-cp27mu/bin:\$PATH"
 | 
			
		||||
elif [[ "$DESIRED_PYTHON" == 3.8m ]]; then
 | 
			
		||||
  export PATH="/opt/python/cp38-cp38/bin:\$PATH"
 | 
			
		||||
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
 | 
			
		||||
  python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"
 | 
			
		||||
  export PATH="/opt/python/cp\$python_nodot-cp\${python_nodot}m/bin:\$PATH"
 | 
			
		||||
 | 
			
		||||
@ -53,8 +53,10 @@ default_set = set([
 | 
			
		||||
    'pytorch-macos-10.13-cuda9.2-cudnn7-py3',
 | 
			
		||||
    # PyTorch Android
 | 
			
		||||
    'pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32-build',
 | 
			
		||||
    'pytorch-linux-xenial-py3-clang5-android-ndk-r19',
 | 
			
		||||
    # PyTorch Android gradle
 | 
			
		||||
    'pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32',
 | 
			
		||||
 | 
			
		||||
    # Pytorch iOS builds
 | 
			
		||||
    'pytorch-ios-11.2.1-x86_64_build',
 | 
			
		||||
    'pytorch-ios-11.2.1-arm64_build',
 | 
			
		||||
 | 
			
		||||
@ -19,27 +19,28 @@ jobs:
 | 
			
		||||
          time docker pull ${DOCKER_IMAGE} >/dev/null
 | 
			
		||||
          export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
 | 
			
		||||
 | 
			
		||||
          # TODO We may want to move the rebase logic to a separate step after checkout
 | 
			
		||||
          # Rebase to master only if in xenial_py3_6_gcc5_4 case
 | 
			
		||||
          if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
 | 
			
		||||
            echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
            set -x
 | 
			
		||||
            git config --global user.email "circleci.ossci@gmail.com"
 | 
			
		||||
            git config --global user.name "CircleCI"
 | 
			
		||||
            git config remote.origin.url https://github.com/pytorch/pytorch.git
 | 
			
		||||
            git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
 | 
			
		||||
            git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
 | 
			
		||||
            export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
 | 
			
		||||
            echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
 | 
			
		||||
            export GIT_COMMIT=${CIRCLE_SHA1}
 | 
			
		||||
            echo "GIT_COMMIT: " ${GIT_COMMIT}
 | 
			
		||||
            git checkout -f ${GIT_COMMIT}
 | 
			
		||||
            git reset --hard ${GIT_COMMIT}
 | 
			
		||||
            git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
 | 
			
		||||
            set +x
 | 
			
		||||
          else
 | 
			
		||||
            echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
          fi
 | 
			
		||||
          # NB: Temporarily disable the rebase logic in v1.4.0, don't merge this change into master
 | 
			
		||||
          # # TODO We may want to move the rebase logic to a separate step after checkout
 | 
			
		||||
          # # Rebase to master only if in xenial_py3_6_gcc5_4 case
 | 
			
		||||
          # if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
 | 
			
		||||
          #   echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
          #   set -x
 | 
			
		||||
          #   git config --global user.email "circleci.ossci@gmail.com"
 | 
			
		||||
          #   git config --global user.name "CircleCI"
 | 
			
		||||
          #   git config remote.origin.url https://github.com/pytorch/pytorch.git
 | 
			
		||||
          #   git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
 | 
			
		||||
          #   git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
 | 
			
		||||
          #   export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
 | 
			
		||||
          #   echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
 | 
			
		||||
          #   export GIT_COMMIT=${CIRCLE_SHA1}
 | 
			
		||||
          #   echo "GIT_COMMIT: " ${GIT_COMMIT}
 | 
			
		||||
          #   git checkout -f ${GIT_COMMIT}
 | 
			
		||||
          #   git reset --hard ${GIT_COMMIT}
 | 
			
		||||
          #   git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
 | 
			
		||||
          #   set +x
 | 
			
		||||
          # else
 | 
			
		||||
          #   echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
 | 
			
		||||
          # fi
 | 
			
		||||
 | 
			
		||||
          git submodule sync && git submodule update -q --init --recursive
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										8
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/lint.yml
									
									
									
									
										vendored
									
									
								
							@ -16,7 +16,7 @@ jobs:
 | 
			
		||||
          python-version: 3.x
 | 
			
		||||
          architecture: x64
 | 
			
		||||
      - name: Checkout PyTorch
 | 
			
		||||
        uses: actions/checkout@master
 | 
			
		||||
        uses: actions/checkout@v1
 | 
			
		||||
      - name: Ensure consistent CircleCI YAML config
 | 
			
		||||
        run: |
 | 
			
		||||
          pip install -r requirements.txt
 | 
			
		||||
@ -51,7 +51,7 @@ jobs:
 | 
			
		||||
          python-version: 3.x
 | 
			
		||||
          architecture: x64
 | 
			
		||||
      - name: Fetch PyTorch
 | 
			
		||||
        uses: actions/checkout@master
 | 
			
		||||
        uses: actions/checkout@v1
 | 
			
		||||
      - name: Checkout PR tip
 | 
			
		||||
        run: |
 | 
			
		||||
          set -eux
 | 
			
		||||
@ -87,7 +87,7 @@ jobs:
 | 
			
		||||
          python-version: 2.x
 | 
			
		||||
          architecture: x64
 | 
			
		||||
      - name: Fetch PyTorch
 | 
			
		||||
        uses: actions/checkout@master
 | 
			
		||||
        uses: actions/checkout@v1
 | 
			
		||||
      - name: Checkout PR tip
 | 
			
		||||
        run: |
 | 
			
		||||
          set -eux
 | 
			
		||||
@ -126,7 +126,7 @@ jobs:
 | 
			
		||||
          python-version: 3.x
 | 
			
		||||
          architecture: x64
 | 
			
		||||
      - name: Checkout PyTorch
 | 
			
		||||
        uses: actions/checkout@master
 | 
			
		||||
        uses: actions/checkout@v1
 | 
			
		||||
      - name: Checkout PR tip
 | 
			
		||||
        run: |
 | 
			
		||||
          set -eux
 | 
			
		||||
 | 
			
		||||
@ -64,7 +64,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
 | 
			
		||||
  exit 0
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
 | 
			
		||||
# if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
 | 
			
		||||
  # Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04
 | 
			
		||||
  # See comments on
 | 
			
		||||
  # https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830
 | 
			
		||||
@ -74,9 +74,9 @@ if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
 | 
			
		||||
  sudo pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
 | 
			
		||||
  sudo pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
 | 
			
		||||
  sudo pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
 | 
			
		||||
else
 | 
			
		||||
  pip install --user --no-cache-dir hypothesis==3.59.0
 | 
			
		||||
fi
 | 
			
		||||
# else
 | 
			
		||||
#   pip install --user --no-cache-dir hypothesis==3.59.0
 | 
			
		||||
# fi
 | 
			
		||||
 | 
			
		||||
# Collect additional tests to run (outside caffe2/python)
 | 
			
		||||
EXTRA_TESTS=()
 | 
			
		||||
@ -133,7 +133,7 @@ pip install --user pytest-sugar
 | 
			
		||||
# torchvision tests #
 | 
			
		||||
#####################
 | 
			
		||||
if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
 | 
			
		||||
  pip install -q --user git+https://github.com/pytorch/vision.git
 | 
			
		||||
  pip install -q --user git+https://github.com/pytorch/vision.git@v0.5.0
 | 
			
		||||
  pip install -q --user ninja
 | 
			
		||||
  # JIT C++ extensions require ninja, so put it into PATH.
 | 
			
		||||
  export PATH="/var/lib/jenkins/.local/bin:$PATH"
 | 
			
		||||
@ -141,7 +141,7 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
 | 
			
		||||
    # default pip version is too old(9.0.2), unable to support tag `manylinux2010`.
 | 
			
		||||
    # Fix the pip error: Couldn't find a version that satisfies the requirement
 | 
			
		||||
    sudo pip install --upgrade pip
 | 
			
		||||
    pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.0.0.dev1104
 | 
			
		||||
    pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.1.0.dev1228
 | 
			
		||||
  fi
 | 
			
		||||
  "$ROOT_DIR/scripts/onnx/test.sh"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
@ -49,7 +49,7 @@ if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
 | 
			
		||||
  export PATH="/var/lib/jenkins/.local/bin:$PATH"
 | 
			
		||||
 | 
			
		||||
  # TODO: move this to Docker
 | 
			
		||||
  pip_install --user hypothesis
 | 
			
		||||
  pip_install --user "hypothesis==4.53.2"
 | 
			
		||||
 | 
			
		||||
  # TODO: move this to Docker
 | 
			
		||||
  PYTHON_VERSION=$(python -c 'import platform; print(platform.python_version())'|cut -c1)
 | 
			
		||||
@ -214,7 +214,7 @@ test_backward_compatibility() {
 | 
			
		||||
  pushd test/backward_compatibility
 | 
			
		||||
  python dump_all_function_schemas.py --filename new_schemas.txt
 | 
			
		||||
  pip_uninstall torch
 | 
			
		||||
  pip_install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
 | 
			
		||||
  pip_install torch==1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
 | 
			
		||||
  python check_backward_compatibility.py --new-schemas new_schemas.txt
 | 
			
		||||
  popd
 | 
			
		||||
  set +x
 | 
			
		||||
 | 
			
		||||
@ -22,7 +22,7 @@ if NOT "%BUILD_ENVIRONMENT%"=="" (
 | 
			
		||||
    :: Numba is pinned to 0.44.0 to avoid https://github.com/numba/numba/issues/4352
 | 
			
		||||
    call conda install -y -q python=3.6.7 numpy mkl cffi pyyaml boto3 protobuf numba==0.44.0
 | 
			
		||||
)
 | 
			
		||||
pip install -q ninja future hypothesis "librosa>=0.6.2" psutil pillow
 | 
			
		||||
pip install -q ninja future "hypothesis==4.53.2" "librosa>=0.6.2" psutil pillow
 | 
			
		||||
:: No need to install faulthandler since we only test Python >= 3.6 on Windows
 | 
			
		||||
:: faulthandler is builtin since Python 3.3
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -413,7 +413,7 @@ public abstract class Tensor {
 | 
			
		||||
   */
 | 
			
		||||
  public long[] getDataAsLongArray() {
 | 
			
		||||
    throw new IllegalStateException(
 | 
			
		||||
        "Tensor of type " + getClass().getSimpleName() + " cannot return data as float array.");
 | 
			
		||||
        "Tensor of type " + getClass().getSimpleName() + " cannot return data as long array.");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /**
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,7 @@
 | 
			
		||||
// merge the libraries inside Facebook".  Well, the problem is that there
 | 
			
		||||
// are some downstream applications which are at binary size limit, and
 | 
			
		||||
// incorporating all of the extra code from libtorch would push them
 | 
			
		||||
// over (admarket/adreview/service:adreviewservice, see also 
 | 
			
		||||
// over (admarket/adreview/service:adreviewservice, see also
 | 
			
		||||
// https://github.com/pytorch/pytorch/pull/29299)  So if you want to do that,
 | 
			
		||||
// we have to fix all of the services like this.
 | 
			
		||||
//
 | 
			
		||||
@ -50,10 +50,10 @@ struct CAFFE2_API VariableHooksInterface {
 | 
			
		||||
  virtual const std::string& name(const Tensor&) const = 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
C10_API void SetVariableHooks(VariableHooksInterface* hooks);
 | 
			
		||||
C10_API VariableHooksInterface* GetVariableHooks();
 | 
			
		||||
CAFFE2_API void SetVariableHooks(VariableHooksInterface* hooks);
 | 
			
		||||
CAFFE2_API VariableHooksInterface* GetVariableHooks();
 | 
			
		||||
 | 
			
		||||
struct C10_API VariableHooksRegisterer {
 | 
			
		||||
struct CAFFE2_API VariableHooksRegisterer {
 | 
			
		||||
  explicit VariableHooksRegisterer(VariableHooksInterface* hooks) {
 | 
			
		||||
    SetVariableHooks(hooks);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -41,7 +41,7 @@ Tensor cosine_embedding_loss(const Tensor& input1, const Tensor& input2, const T
 | 
			
		||||
  auto denom = (mag_square1 * mag_square2).sqrt_();
 | 
			
		||||
  auto cos = prod_sum / denom;
 | 
			
		||||
 | 
			
		||||
  auto zeros = at::zeros_like(target, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
 | 
			
		||||
  auto zeros = at::zeros_like(cos, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
 | 
			
		||||
  auto pos = 1 - cos;
 | 
			
		||||
  auto neg = (cos - margin).clamp_min_(0);
 | 
			
		||||
  auto output_pos = at::where(target == 1, pos, zeros);
 | 
			
		||||
@ -77,8 +77,8 @@ Tensor margin_ranking_loss(const Tensor& input1, const Tensor& input2, const Ten
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor kl_div(const Tensor& input, const Tensor& target, int64_t reduction) {
 | 
			
		||||
  auto zeros = at::zeros_like(target, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
 | 
			
		||||
  auto output_pos = target * (at::log(target) - input);
 | 
			
		||||
  auto zeros = at::zeros_like(output_pos, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
 | 
			
		||||
  auto output = at::where(target > 0, output_pos, zeros);
 | 
			
		||||
  return apply_loss_reduction(output, reduction);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -324,13 +324,14 @@ Tensor unflatten(const Tensor& self, int64_t dim, IntArrayRef sizes, DimnameList
 | 
			
		||||
      "up to the size of dim ", dim, " (", self.names()[dim], ": ", self.size(dim),
 | 
			
		||||
      ") in Tensor", self.names());
 | 
			
		||||
 | 
			
		||||
  int64_t dim_wrap = maybe_wrap_dim(dim, self.dim());
 | 
			
		||||
  auto outnames = self.names().vec();
 | 
			
		||||
  outnames.erase(outnames.begin() + dim);
 | 
			
		||||
  outnames.insert(outnames.begin() + dim, names.begin(), names.end());
 | 
			
		||||
  outnames.erase(outnames.begin() + dim_wrap);
 | 
			
		||||
  outnames.insert(outnames.begin() + dim_wrap, names.begin(), names.end());
 | 
			
		||||
 | 
			
		||||
  auto new_sizes = self.sizes().vec();
 | 
			
		||||
  new_sizes.erase(new_sizes.begin() + dim);
 | 
			
		||||
  new_sizes.insert(new_sizes.begin() + dim, sizes.begin(), sizes.end());
 | 
			
		||||
  new_sizes.erase(new_sizes.begin() + dim_wrap);
 | 
			
		||||
  new_sizes.insert(new_sizes.begin() + dim_wrap, sizes.begin(), sizes.end());
 | 
			
		||||
 | 
			
		||||
  Tensor result;
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
@ -138,5 +138,14 @@ Tensor max_pool3d(
 | 
			
		||||
      self, kernel_size, stride, padding, dilation, ceil_mode);
 | 
			
		||||
  return std::get<0>(output_and_indices);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor _test_optional_float(const Tensor & self, c10::optional<double> scale) {
 | 
			
		||||
  if (scale.has_value()) {
 | 
			
		||||
    return at::full({}, scale.value(), self.options());
 | 
			
		||||
  } else {
 | 
			
		||||
    return at::empty({0}, self.options());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace native
 | 
			
		||||
} // namespace at
 | 
			
		||||
 | 
			
		||||
@ -134,6 +134,7 @@ std::vector<Tensor> where(const Tensor& condition) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& other) {
 | 
			
		||||
  TORCH_CHECK(self.dtype() == other.dtype(), "expected scalar type ", self.dtype(), " but found ", other.dtype());
 | 
			
		||||
  Tensor ret = at::empty(self.sizes(), self.options());
 | 
			
		||||
  AT_DISPATCH_ALL_TYPES_AND_COMPLEX(ret.scalar_type(), "where_cpu", [&] {
 | 
			
		||||
    where_cpu<scalar_t>(ret, condition, self, other);
 | 
			
		||||
 | 
			
		||||
@ -42,7 +42,22 @@ static void copy_kernel(TensorIterator& iter, bool non_blocking) {
 | 
			
		||||
    AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, dtype, "copy_", [&] {
 | 
			
		||||
      using dest_t = scalar_t;
 | 
			
		||||
      AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, iter.dtype(1), "copy_", [&] {
 | 
			
		||||
        cpu_kernel(iter, c10::static_cast_with_inter_type<dest_t, scalar_t>);
 | 
			
		||||
        // Note (@zasdfgbnm):
 | 
			
		||||
        //
 | 
			
		||||
        // The code below can not be simplified as
 | 
			
		||||
        //    cpu_kernel(iter, c10::static_cast_with_inter_type<dest_t, scalar_t>::apply);
 | 
			
		||||
        //
 | 
			
		||||
        // because this would force the compiler to instantiate the inline function and generate a function call in the loop
 | 
			
		||||
        // instead of inlining it, making all the optimizations like vectorization impossible.
 | 
			
		||||
        // You can verify this by looking the the symbols of `libtorch_cpu.so`:
 | 
			
		||||
        //
 | 
			
		||||
        //    readelf -Ws libtorch_cpu.so | grep static_cast_with_inter_type
 | 
			
		||||
        //
 | 
			
		||||
        // If done correctly, the above command should have no output.
 | 
			
		||||
        //
 | 
			
		||||
        // See: https://github.com/pytorch/pytorch/issues/31271
 | 
			
		||||
        cpu_kernel(iter, [](scalar_t src) -> dest_t {
 | 
			
		||||
          return c10::static_cast_with_inter_type<dest_t, scalar_t>(src); });
 | 
			
		||||
      });
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -171,8 +171,9 @@ void avg_pool2d_out_cuda_template(
 | 
			
		||||
 | 
			
		||||
  output.resize_({nbatch, nInputPlane, outputHeight, outputWidth});
 | 
			
		||||
 | 
			
		||||
  const int count = safe_downcast<int, int64_t>(output.numel());
 | 
			
		||||
  const int num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
 | 
			
		||||
  const int32_t count = safe_downcast<int32_t, int64_t>(output.numel());
 | 
			
		||||
  const uint32_t  num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
 | 
			
		||||
  const uint32_t num_blocks = cuda::ATenCeilDiv<uint32_t>(count, num_threads);
 | 
			
		||||
 | 
			
		||||
  if (divisor_override.has_value()) {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(),
 | 
			
		||||
@ -184,7 +185,7 @@ void avg_pool2d_out_cuda_template(
 | 
			
		||||
        scalar_t *input_data = input.data_ptr<scalar_t>();
 | 
			
		||||
 | 
			
		||||
        avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, false, true>
 | 
			
		||||
            <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
            <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
            count,
 | 
			
		||||
                input_data,
 | 
			
		||||
                nbatch,
 | 
			
		||||
@ -209,7 +210,7 @@ void avg_pool2d_out_cuda_template(
 | 
			
		||||
          scalar_t *input_data = input.data_ptr<scalar_t>();
 | 
			
		||||
 | 
			
		||||
          avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, true, false>
 | 
			
		||||
              <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
              <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
              count,
 | 
			
		||||
                  input_data,
 | 
			
		||||
                  nbatch,
 | 
			
		||||
@ -233,7 +234,7 @@ void avg_pool2d_out_cuda_template(
 | 
			
		||||
          scalar_t *input_data = input.data_ptr<scalar_t>();
 | 
			
		||||
 | 
			
		||||
          avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, false, false>
 | 
			
		||||
              <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
              <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
              count,
 | 
			
		||||
                  input_data,
 | 
			
		||||
                  nbatch,
 | 
			
		||||
@ -249,10 +250,8 @@ void avg_pool2d_out_cuda_template(
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  TORCH_CHECK(cudaGetLastError() == cudaSuccess,
 | 
			
		||||
     "avg_pool2d_out_cuda_frame failed with error code ",
 | 
			
		||||
     cudaGetLastError());
 | 
			
		||||
 | 
			
		||||
  THCudaCheck(cudaGetLastError());
 | 
			
		||||
 | 
			
		||||
  if (input.ndimension() == 3) {
 | 
			
		||||
    output.resize_({nInputPlane, outputHeight, outputWidth});
 | 
			
		||||
@ -322,8 +321,9 @@ Tensor& avg_pool2d_backward_out_cuda_template(
 | 
			
		||||
 | 
			
		||||
  gradInput.resize_as_(input);
 | 
			
		||||
 | 
			
		||||
  const int count =  safe_downcast<int, int64_t>(input.numel());
 | 
			
		||||
  const int num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
 | 
			
		||||
  const int32_t count =  safe_downcast<int32_t, int64_t>(input.numel());
 | 
			
		||||
  const uint32_t num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
 | 
			
		||||
  const uint32_t num_blocks = cuda::ATenCeilDiv<uint32_t>(count, num_threads);
 | 
			
		||||
 | 
			
		||||
  if (divisor_override.has_value()) {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(),
 | 
			
		||||
@ -335,7 +335,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
 | 
			
		||||
        scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>();
 | 
			
		||||
 | 
			
		||||
        avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, false, true>
 | 
			
		||||
            <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
            <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
            count,
 | 
			
		||||
                gradOutput_data,
 | 
			
		||||
                nbatch,
 | 
			
		||||
@ -360,7 +360,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
 | 
			
		||||
          scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>();
 | 
			
		||||
 | 
			
		||||
          avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, true, false>
 | 
			
		||||
            <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
            <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
               count,
 | 
			
		||||
               gradOutput_data,
 | 
			
		||||
               nbatch,
 | 
			
		||||
@ -384,7 +384,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
 | 
			
		||||
          scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>();
 | 
			
		||||
 | 
			
		||||
          avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, false, false>
 | 
			
		||||
            <<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
            <<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
 | 
			
		||||
               count,
 | 
			
		||||
               gradOutput_data,
 | 
			
		||||
               nbatch,
 | 
			
		||||
@ -400,9 +400,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  TORCH_CHECK(cudaGetLastError() == cudaSuccess,
 | 
			
		||||
    "avg_pool2d_backward_out_cuda failed with error code ",
 | 
			
		||||
    cudaGetLastError());
 | 
			
		||||
  THCudaCheck(cudaGetLastError());
 | 
			
		||||
 | 
			
		||||
  return gradInput;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -22,16 +22,88 @@ static inline __host__ __device__ T powi(T a, T b) {
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SFINAE doesn't work well with NVCC under Windows for math functions like pow and sqrt.
 | 
			
		||||
// So we need to define the functions with the explicit function signatures.
 | 
			
		||||
// As for pow, the following signatures are defined as the device function:
 | 
			
		||||
//   pow(float, int)
 | 
			
		||||
//   pow(double, int)
 | 
			
		||||
//   pow(float, float)
 | 
			
		||||
//   pow(double, double)
 | 
			
		||||
// As for sqrt, the following signatures are defined as the device function:
 | 
			
		||||
//   sqrt(float)
 | 
			
		||||
//   sqrt(double)
 | 
			
		||||
// As for inverse sqrt, we must define it explicitly in MSVC, otherwise the static cast will be
 | 
			
		||||
// applied to the result of the inline function, and thus the result is incorrect.
 | 
			
		||||
//   e.g. if we use 1.0 / sqrt(2) for 2 ^ (-0.5) in MSVC, we get
 | 
			
		||||
//          int(2 ^ (-0.5)) = int(1.0 / sqrt(2)) = int(1.0 / int(1.414)) = int(1.0 / 1) = 1
 | 
			
		||||
//        However, the correct result is 
 | 
			
		||||
//          int(2 ^ (-0.5)) = int(1.0 / 1.414) = 0
 | 
			
		||||
#ifdef _MSC_VER
 | 
			
		||||
// Functions for pow
 | 
			
		||||
// pow for at::Half
 | 
			
		||||
static inline __host__ __device__ at::Half pow_(at::Half base, at::Half exp) {
 | 
			
		||||
  return static_cast<at::Half>(std::pow(static_cast<float>(base), static_cast<float>(exp)));
 | 
			
		||||
}
 | 
			
		||||
// pow (floating, floating/int)
 | 
			
		||||
template <typename Base_type, typename Exp_type>
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<std::is_floating_point<Base_type>::value && (std::is_same<Base_type, Exp_type>::value || std::is_same<Exp_type, int>::value), Base_type>::type
 | 
			
		||||
  pow_(Base_type base, Exp_type exp) {
 | 
			
		||||
  return std::pow(base, exp);
 | 
			
		||||
}
 | 
			
		||||
// pow (integral, integral)
 | 
			
		||||
template <typename Base_type, typename Exp_type>
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<std::is_integral<Base_type>::value && std::is_same<Base_type, Exp_type>::value, Base_type>::type
 | 
			
		||||
  pow_(Base_type base, Exp_type exp) {
 | 
			
		||||
  return powi(base, exp);
 | 
			
		||||
}
 | 
			
		||||
// pow (Otherwise)
 | 
			
		||||
template <typename Base_type, typename Exp_type>
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<!std::is_same<Base_type, Exp_type>::value && !std::is_same<Exp_type, int>::value, Base_type>::type
 | 
			
		||||
  pow_(Base_type base, Exp_type exp) {
 | 
			
		||||
  return static_cast<Base_type>(std::pow(static_cast<double>(base), static_cast<double>(exp)));
 | 
			
		||||
}
 | 
			
		||||
// Functions for sqrt
 | 
			
		||||
// sqrt (floating)
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline __host__ __device__ T sqrt(T x) {
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<std::is_floating_point<T>::value, T>::type sqrt_(T x) {
 | 
			
		||||
  return std::sqrt(x);
 | 
			
		||||
}
 | 
			
		||||
// sqrt (integral)
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<!std::is_floating_point<T>::value, T>::type sqrt_(T x) {
 | 
			
		||||
  return static_cast<T>(std::sqrt(static_cast<double>(x)));
 | 
			
		||||
}
 | 
			
		||||
// Function for inverse sqrt
 | 
			
		||||
// invsqrt (floating)
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<std::is_floating_point<T>::value, T>::type invsqrt_(T x) {
 | 
			
		||||
  return 1.0 / std::sqrt(x);
 | 
			
		||||
}
 | 
			
		||||
// invsqrt (integral)
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline __host__ __device__ typename std::enable_if<!std::is_floating_point<T>::value, T>::type invsqrt_(T x) {
 | 
			
		||||
  return static_cast<T>(1.0 / std::sqrt(static_cast<double>(x)));
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
template <typename Base_type, typename Exp_type>
 | 
			
		||||
static inline __host__ __device__ Base_type pow_(Base_type base, Exp_type exp) {
 | 
			
		||||
  return std::pow(base, exp);
 | 
			
		||||
}
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline __host__ __device__ T sqrt_(T x) {
 | 
			
		||||
  return ::sqrt(x);
 | 
			
		||||
}
 | 
			
		||||
template <typename T>
 | 
			
		||||
static inline __host__ __device__ T invsqrt_(T x) {
 | 
			
		||||
  return 1.0 / ::sqrt(x);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void pow_tensor_tensor_kernel(TensorIterator& iter) {
 | 
			
		||||
  if (isFloatingType(iter.dtype())) {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(iter.dtype(), "pow_cuda", [&]() {
 | 
			
		||||
      gpu_kernel(iter, []GPU_LAMBDA(scalar_t base, scalar_t exp) -> scalar_t {
 | 
			
		||||
        return std::pow(base, exp);
 | 
			
		||||
        return pow_(base, exp);
 | 
			
		||||
      });
 | 
			
		||||
    });
 | 
			
		||||
  } else {
 | 
			
		||||
@ -49,7 +121,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter,
 | 
			
		||||
  const auto d_exp = static_cast<double>(exp);
 | 
			
		||||
  if (d_exp == 0.5) {
 | 
			
		||||
    gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
 | 
			
		||||
      return ::sqrt(base);
 | 
			
		||||
      return sqrt_(base);
 | 
			
		||||
    });
 | 
			
		||||
  } else if (d_exp == 2) {
 | 
			
		||||
    gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
 | 
			
		||||
@ -61,7 +133,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter,
 | 
			
		||||
    });
 | 
			
		||||
  } else if (d_exp == -0.5) {
 | 
			
		||||
    gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
 | 
			
		||||
      return 1.0 / ::sqrt(base);
 | 
			
		||||
      return invsqrt_(base);
 | 
			
		||||
    });
 | 
			
		||||
  } else if (d_exp == -1) {
 | 
			
		||||
    gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
 | 
			
		||||
@ -73,7 +145,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter,
 | 
			
		||||
    });
 | 
			
		||||
  } else {
 | 
			
		||||
    gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
 | 
			
		||||
      return std::pow(base, exp);
 | 
			
		||||
      return pow_(base, exp);
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -52,7 +52,7 @@ Tensor& linspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step
 | 
			
		||||
  } else if (steps == 1) {
 | 
			
		||||
    r.fill_(start);
 | 
			
		||||
  } else {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES(r.scalar_type(), "linspace_cuda", [&]() {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(r.scalar_type(), "linspace_cuda", [&]() {
 | 
			
		||||
      scalar_t scalar_start = start.to<scalar_t>();
 | 
			
		||||
      scalar_t scalar_end = end.to<scalar_t>();
 | 
			
		||||
      scalar_t step = (scalar_end - scalar_start) / static_cast<scalar_t>(steps - 1);
 | 
			
		||||
@ -84,7 +84,7 @@ Tensor& logspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step
 | 
			
		||||
  } else if (steps == 1) {
 | 
			
		||||
    r.fill_(std::pow(base, start.to<double>()));
 | 
			
		||||
  } else {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES(r.scalar_type(), "logspace_cuda", [&]() {
 | 
			
		||||
    AT_DISPATCH_FLOATING_TYPES_AND_HALF(r.scalar_type(), "logspace_cuda", [&]() {
 | 
			
		||||
      scalar_t scalar_base = static_cast<scalar_t>(base);
 | 
			
		||||
      scalar_t scalar_start = start.to<scalar_t>();
 | 
			
		||||
      scalar_t scalar_end = end.to<scalar_t>();
 | 
			
		||||
 | 
			
		||||
@ -6068,6 +6068,9 @@
 | 
			
		||||
    CPU: replication_pad3d_backward_cpu
 | 
			
		||||
    CUDA: replication_pad3d_backward_cuda
 | 
			
		||||
 | 
			
		||||
- func: _test_optional_float(Tensor self, *, float? scale=None) -> Tensor
 | 
			
		||||
  variants: function
 | 
			
		||||
 | 
			
		||||
- func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)
 | 
			
		||||
  python_module: nn
 | 
			
		||||
  dispatch:
 | 
			
		||||
 | 
			
		||||
@ -77,6 +77,8 @@ def type_argument_translations(arg):
 | 
			
		||||
    # Enables float by translating to legacy double.
 | 
			
		||||
    elif t == 'float':
 | 
			
		||||
        t = 'double'
 | 
			
		||||
    elif t == 'float?':
 | 
			
		||||
        t = 'double?'
 | 
			
		||||
    # Enables str by translating to legacy std::string.
 | 
			
		||||
    elif t == 'str':
 | 
			
		||||
        t = 'std::string'
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										25
									
								
								caffe2/operators/alias_with_name.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								caffe2/operators/alias_with_name.cc
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,25 @@
 | 
			
		||||
#include "caffe2/operators/alias_with_name.h"
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
REGISTER_CPU_OPERATOR(AliasWithName, AliasWithNameOp<CPUContext>);
 | 
			
		||||
 | 
			
		||||
OPERATOR_SCHEMA(AliasWithName)
 | 
			
		||||
    .NumInputs(1)
 | 
			
		||||
    .NumOutputs(1)
 | 
			
		||||
    .AllowInplace({{0, 0}})
 | 
			
		||||
    .IdenticalTypeAndShape()
 | 
			
		||||
    .SetDoc(R"DOC(
 | 
			
		||||
Similar with AliasOp, storing the alias name as operator argument.
 | 
			
		||||
)DOC")
 | 
			
		||||
    .Arg("name", "name of the aliasing")
 | 
			
		||||
    .Arg("is_backward", "weather or not to alias forward or backward")
 | 
			
		||||
    .Input(0, "input", "Input tensor whose storage will be shared.")
 | 
			
		||||
    .Output(0, "output", "Tensor of same shape as input, sharing its storage.");
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
 | 
			
		||||
    AliasWithName,
 | 
			
		||||
    "_caffe2::AliasWithName(Tensor input, str name, bool is_backward = False) -> (Tensor output)",
 | 
			
		||||
    caffe2::AliasWithNameOp<caffe2::CPUContext>);
 | 
			
		||||
							
								
								
									
										12
									
								
								caffe2/operators/alias_with_name.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								caffe2/operators/alias_with_name.cu
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,12 @@
 | 
			
		||||
#include "caffe2/core/context_gpu.h"
 | 
			
		||||
#include "caffe2/operators/alias_with_name.h"
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
REGISTER_CUDA_OPERATOR(AliasWithName, AliasWithNameOp<CUDAContext>);
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(
 | 
			
		||||
    AliasWithName,
 | 
			
		||||
    caffe2::AliasWithNameOp<caffe2::CUDAContext>);
 | 
			
		||||
							
								
								
									
										46
									
								
								caffe2/operators/alias_with_name.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								caffe2/operators/alias_with_name.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,46 @@
 | 
			
		||||
#ifndef ALIAS_WITH_NAME_OP_H_
 | 
			
		||||
#define ALIAS_WITH_NAME_OP_H_
 | 
			
		||||
 | 
			
		||||
#include "caffe2/core/context.h"
 | 
			
		||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
 | 
			
		||||
#include "caffe2/core/operator.h"
 | 
			
		||||
 | 
			
		||||
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(AliasWithName)
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
template <class Context>
 | 
			
		||||
class AliasWithNameOp final : public Operator<Context> {
 | 
			
		||||
 public:
 | 
			
		||||
  USE_OPERATOR_CONTEXT_FUNCTIONS;
 | 
			
		||||
  template <class... Args>
 | 
			
		||||
  explicit AliasWithNameOp(Args&&... args)
 | 
			
		||||
      : Operator<Context>(std::forward<Args>(args)...),
 | 
			
		||||
        name_(this->template GetSingleArgument<std::string>(
 | 
			
		||||
            "name",
 | 
			
		||||
            "invalid_name")),
 | 
			
		||||
        is_backward_(
 | 
			
		||||
            this->template GetSingleArgument<bool>("is_backward", false)) {
 | 
			
		||||
    CAFFE_ENFORCE(
 | 
			
		||||
        OperatorBase::HasArgument("name"), "You have to specify argument name");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  bool RunOnDevice() override {
 | 
			
		||||
    auto& input = Input(0);
 | 
			
		||||
    CAFFE_ENFORCE_GE(input.numel(), 0, "Tensor is not initialized");
 | 
			
		||||
 | 
			
		||||
    // This doesn't work anymore as this is "newstyle" operator
 | 
			
		||||
    // OutputTensorAlias(0, input);
 | 
			
		||||
 | 
			
		||||
    OperatorBase::SetOutputTensor(0, input.Alias());
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 protected:
 | 
			
		||||
  std::string name_;
 | 
			
		||||
  bool is_backward_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
#endif // ALIAS_WITH_NAME_OP_H_
 | 
			
		||||
							
								
								
									
										169
									
								
								caffe2/operators/batch_permutation_op.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								caffe2/operators/batch_permutation_op.cc
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,169 @@
 | 
			
		||||
#include "caffe2/operators/batch_permutation_op.h"
 | 
			
		||||
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#ifdef CAFFE2_USE_MKLDNN
 | 
			
		||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
 | 
			
		||||
#include <caffe2/ideep/utils/ideep_operator.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
template <bool forwards>
 | 
			
		||||
void batch_permutation_loop(
 | 
			
		||||
    const int N,
 | 
			
		||||
    const int K,
 | 
			
		||||
    const float* src,
 | 
			
		||||
    const int* indices,
 | 
			
		||||
    float* dst) {
 | 
			
		||||
  long numBytes = K * sizeof(float);
 | 
			
		||||
  if (forwards) {
 | 
			
		||||
#ifdef _OPENMP
 | 
			
		||||
#if (_OPENMP >= 201307)
 | 
			
		||||
#pragma omp parallel for simd
 | 
			
		||||
#else
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
    for (int n = 0; n < N; n++) {
 | 
			
		||||
      int origIdx = n * K;
 | 
			
		||||
      int permuteIdx = indices[n] * K;
 | 
			
		||||
      std::memcpy(dst + origIdx, src + permuteIdx, numBytes);
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    std::vector<int> backward_indices(N);
 | 
			
		||||
    for (size_t i = 0; i < N; ++i) {
 | 
			
		||||
      backward_indices[indices[i]] = i;
 | 
			
		||||
    }
 | 
			
		||||
    for (int n = 0; n < N; n++) {
 | 
			
		||||
      int permuteIdx = n * K;
 | 
			
		||||
      int origIdx = backward_indices[n] * K;
 | 
			
		||||
      std::memcpy(dst + permuteIdx, src + origIdx, numBytes);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
 | 
			
		||||
  auto& X = Input(0);
 | 
			
		||||
  auto& indices = Input(1);
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d");
 | 
			
		||||
  CAFFE_ENFORCE(
 | 
			
		||||
      X.dim32(0) == indices.dim32(0),
 | 
			
		||||
      "X.dim32(0) must be equal to indices.dim32(0)",
 | 
			
		||||
      "(",
 | 
			
		||||
      X.dim32(0),
 | 
			
		||||
      " vs. ",
 | 
			
		||||
      indices.dim32(0),
 | 
			
		||||
      ")");
 | 
			
		||||
 | 
			
		||||
  auto* Y = Output(0, X.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE_GT(X.dim32(0), 0);
 | 
			
		||||
  batch_permutation_loop<true>(
 | 
			
		||||
      X.dim32(0),
 | 
			
		||||
      X.numel() / X.dim32(0),
 | 
			
		||||
      X.data<float>(),
 | 
			
		||||
      indices.data<int>(),
 | 
			
		||||
      Y->mutable_data<float>());
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationGradientOp<float, CPUContext>::RunOnDevice() {
 | 
			
		||||
  auto& indices = Input(0);
 | 
			
		||||
  auto& dY = Input(1);
 | 
			
		||||
 | 
			
		||||
  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE_GT(dY.dim32(0), 0);
 | 
			
		||||
  batch_permutation_loop<false>(
 | 
			
		||||
      dY.dim32(0),
 | 
			
		||||
      dY.numel() / dY.dim32(0),
 | 
			
		||||
      dY.data<float>(),
 | 
			
		||||
      indices.data<int>(),
 | 
			
		||||
      dX->mutable_data<float>());
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef CAFFE2_USE_MKLDNN
 | 
			
		||||
REGISTER_IDEEP_OPERATOR(
 | 
			
		||||
    BatchPermutation,
 | 
			
		||||
    IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
 | 
			
		||||
REGISTER_CPU_OPERATOR(
 | 
			
		||||
    BatchPermutationGradient,
 | 
			
		||||
    BatchPermutationGradientOp<float, CPUContext>);
 | 
			
		||||
 | 
			
		||||
// Input: X, indices; Output: Y
 | 
			
		||||
OPERATOR_SCHEMA(BatchPermutation)
 | 
			
		||||
    .NumInputs(2)
 | 
			
		||||
    .NumOutputs(1)
 | 
			
		||||
    .SetDoc(R"DOC(
 | 
			
		||||
Batch permutation of an input tensor X given input indices. First dimension of
 | 
			
		||||
X equals batch size N. The indices stores a be permutation of N.
 | 
			
		||||
The output Y is a tensor of same shape as X, with data re-ordered according to
 | 
			
		||||
the indices within the batch size.
 | 
			
		||||
 | 
			
		||||
Example of batch permutation on a 2-D tensor with batch size 4:
 | 
			
		||||
  X = [
 | 
			
		||||
    [1, 5, 2, 3, 4, 6, 0],
 | 
			
		||||
    [4, 3, 3, 5, 2, 3, 1],
 | 
			
		||||
    [2, 2, 3, 6, 0, 0, 1],
 | 
			
		||||
    [0, 0, 1, 1, 2, 2, 3]
 | 
			
		||||
  ]
 | 
			
		||||
  indices = [2, 0, 1, 3]
 | 
			
		||||
  Y = [
 | 
			
		||||
    [2, 2, 3, 6, 0, 0, 1],
 | 
			
		||||
    [1, 5, 2, 3, 4, 6, 0],
 | 
			
		||||
    [4, 3, 3, 5, 2, 3, 1],
 | 
			
		||||
    [0, 0, 1, 1, 2, 2, 3]
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
Example of batch permutation on a 3-D tensor with batch size 4:
 | 
			
		||||
  X = [
 | 
			
		||||
    [[1, 5, 2], [3, 4, 6, 0]],
 | 
			
		||||
    [[4, 3, 3], [5, 2, 3, 1]],
 | 
			
		||||
    [[2, 2, 3], [6, 0, 0, 1]],
 | 
			
		||||
    [[0, 0, 1], [1, 2, 2, 3]]
 | 
			
		||||
  ]
 | 
			
		||||
  indices = [2, 0, 1, 3]
 | 
			
		||||
  Y = [
 | 
			
		||||
    [[2, 2, 3], [6, 0, 0, 1]],
 | 
			
		||||
    [[1, 5, 2], [3, 4, 6, 0]],
 | 
			
		||||
    [[4, 3, 3], [5, 2, 3, 1]],
 | 
			
		||||
    [[0, 0, 1], [1, 2, 2, 3]]
 | 
			
		||||
  ]
 | 
			
		||||
)DOC")
 | 
			
		||||
    .Input(0, "X", "Input tensor, where 1st dimension equals batch size")
 | 
			
		||||
    .Input(1, "indices", "Input indices of batch to permute")
 | 
			
		||||
    .Output(0, "Y", "Output permuted tensor");
 | 
			
		||||
// Input: indices, dY (aka "gradOutput"); Output: dX (aka "gradInput")
 | 
			
		||||
OPERATOR_SCHEMA(BatchPermutationGradient).NumInputs(2).NumOutputs(1);
 | 
			
		||||
 | 
			
		||||
class GetBatchPermutationGradient : public GradientMakerBase {
 | 
			
		||||
  using GradientMakerBase::GradientMakerBase;
 | 
			
		||||
  vector<OperatorDef> GetGradientDefs() override {
 | 
			
		||||
    return SingleGradientDef(
 | 
			
		||||
        "BatchPermutationGradient",
 | 
			
		||||
        "",
 | 
			
		||||
        vector<string>{I(1), GO(0)},
 | 
			
		||||
        vector<string>{GI(0)});
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient);
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
using BatchPermutationOpFloatCPU =
 | 
			
		||||
    caffe2::BatchPermutationOp<float, caffe2::CPUContext>;
 | 
			
		||||
 | 
			
		||||
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
 | 
			
		||||
    BatchPermutation,
 | 
			
		||||
    "_caffe2::BatchPermutation(Tensor X, Tensor indices) -> Tensor",
 | 
			
		||||
    BatchPermutationOpFloatCPU);
 | 
			
		||||
							
								
								
									
										113
									
								
								caffe2/operators/batch_permutation_op.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								caffe2/operators/batch_permutation_op.cu
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,113 @@
 | 
			
		||||
#include "caffe2/core/context_gpu.h"
 | 
			
		||||
#include "caffe2/operators/batch_permutation_op.h"
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
template <bool forward>
 | 
			
		||||
__global__ void BatchPermutationKernel(
 | 
			
		||||
    int N,
 | 
			
		||||
    int K,
 | 
			
		||||
    const float* src,
 | 
			
		||||
    const int* indices,
 | 
			
		||||
    float* dst) {
 | 
			
		||||
  if (forward) {
 | 
			
		||||
    CUDA_1D_KERNEL_LOOP(index, N * K) {
 | 
			
		||||
      int k = index % K;
 | 
			
		||||
      int n = index / K;
 | 
			
		||||
      int idx = indices[n];
 | 
			
		||||
      CUDA_KERNEL_ASSERT(idx >= 0);
 | 
			
		||||
      CUDA_KERNEL_ASSERT(idx < N);
 | 
			
		||||
      dst[index] = src[idx * K + k];
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    CUDA_1D_KERNEL_LOOP(index, N * K) {
 | 
			
		||||
      int k = index % K;
 | 
			
		||||
      int n = index / K;
 | 
			
		||||
 | 
			
		||||
      // NOTE: an alternative implementation if we want to align the index with
 | 
			
		||||
      // the output tensor (rather than the input tensor).
 | 
			
		||||
      // int idx = -1;
 | 
			
		||||
      // for (size_t i = 0; i < N; ++i) {
 | 
			
		||||
      //   if (indices[i] == n) {
 | 
			
		||||
      //     idx = i;
 | 
			
		||||
      //   }
 | 
			
		||||
      // }
 | 
			
		||||
      // CUDA_KERNEL_ASSERT(idx >= 0);
 | 
			
		||||
      // CUDA_KERNEL_ASSERT(idx < N);
 | 
			
		||||
      // dst[index] = src[idx * K + k];
 | 
			
		||||
 | 
			
		||||
      int idx = indices[n];
 | 
			
		||||
      CUDA_KERNEL_ASSERT(idx >= 0);
 | 
			
		||||
      CUDA_KERNEL_ASSERT(idx < N);
 | 
			
		||||
      dst[idx * K + k] = src[index];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
} // namespace
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationOp<float, CUDAContext>::RunOnDevice() {
 | 
			
		||||
  auto& X = Input(0);
 | 
			
		||||
  auto& indices = Input(1);
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d");
 | 
			
		||||
  CAFFE_ENFORCE(
 | 
			
		||||
      X.dim32(0) == indices.dim32(0),
 | 
			
		||||
      "X.dim32(0) must be equal to indices.dim32(0)",
 | 
			
		||||
      "(",
 | 
			
		||||
      X.dim32(0),
 | 
			
		||||
      " vs. ",
 | 
			
		||||
      indices.dim32(0),
 | 
			
		||||
      ")");
 | 
			
		||||
 | 
			
		||||
  auto* Y = Output(0, X.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE_GT(X.dim32(0), 0);
 | 
			
		||||
  BatchPermutationKernel<true>
 | 
			
		||||
      <<<CAFFE_GET_BLOCKS(X.numel()),
 | 
			
		||||
         CAFFE_CUDA_NUM_THREADS,
 | 
			
		||||
         0,
 | 
			
		||||
         context_.cuda_stream()>>>(
 | 
			
		||||
          X.dim32(0),
 | 
			
		||||
          X.numel() / X.dim32(0),
 | 
			
		||||
          X.data<float>(),
 | 
			
		||||
          indices.data<int>(),
 | 
			
		||||
          Y->mutable_data<float>());
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationGradientOp<float, CUDAContext>::RunOnDevice() {
 | 
			
		||||
  auto& indices = Input(0);
 | 
			
		||||
  auto& dY = Input(1);
 | 
			
		||||
  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE_GT(dY.dim32(0), 0);
 | 
			
		||||
  BatchPermutationKernel<false>
 | 
			
		||||
      <<<CAFFE_GET_BLOCKS(dY.numel()),
 | 
			
		||||
         CAFFE_CUDA_NUM_THREADS,
 | 
			
		||||
         0,
 | 
			
		||||
         context_.cuda_stream()>>>(
 | 
			
		||||
          dY.dim32(0),
 | 
			
		||||
          dY.numel() / dY.dim32(0),
 | 
			
		||||
          dY.data<float>(),
 | 
			
		||||
          indices.data<int>(),
 | 
			
		||||
          dX->mutable_data<float>());
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
REGISTER_CUDA_OPERATOR(
 | 
			
		||||
    BatchPermutation,
 | 
			
		||||
    BatchPermutationOp<float, CUDAContext>);
 | 
			
		||||
REGISTER_CUDA_OPERATOR(
 | 
			
		||||
    BatchPermutationGradient,
 | 
			
		||||
    BatchPermutationGradientOp<float, CUDAContext>);
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
using BatchPermutationOpFloatCUDA =
 | 
			
		||||
    caffe2::BatchPermutationOp<float, caffe2::CUDAContext>;
 | 
			
		||||
 | 
			
		||||
C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(BatchPermutation, BatchPermutationOpFloatCUDA);
 | 
			
		||||
							
								
								
									
										37
									
								
								caffe2/operators/batch_permutation_op.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								caffe2/operators/batch_permutation_op.h
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
			
		||||
#ifndef BATCHPERMUTATION_OP_H_
 | 
			
		||||
#define BATCHPERMUTATION_OP_H_
 | 
			
		||||
 | 
			
		||||
#include "caffe2/core/context.h"
 | 
			
		||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
 | 
			
		||||
#include "caffe2/core/logging.h"
 | 
			
		||||
#include "caffe2/core/operator.h"
 | 
			
		||||
#include "caffe2/utils/math.h"
 | 
			
		||||
 | 
			
		||||
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(BatchPermutation)
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
template <typename T, class Context>
 | 
			
		||||
class BatchPermutationOp final : public Operator<Context> {
 | 
			
		||||
 public:
 | 
			
		||||
  template <class... Args>
 | 
			
		||||
  explicit BatchPermutationOp(Args&&... args)
 | 
			
		||||
      : Operator<Context>(std::forward<Args>(args)...) {}
 | 
			
		||||
  USE_OPERATOR_CONTEXT_FUNCTIONS;
 | 
			
		||||
 | 
			
		||||
  bool RunOnDevice();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename T, class Context>
 | 
			
		||||
class BatchPermutationGradientOp final : public Operator<Context> {
 | 
			
		||||
 public:
 | 
			
		||||
  BatchPermutationGradientOp(const OperatorDef& def, Workspace* ws)
 | 
			
		||||
      : Operator<Context>(def, ws) {}
 | 
			
		||||
  USE_OPERATOR_CONTEXT_FUNCTIONS;
 | 
			
		||||
 | 
			
		||||
  bool RunOnDevice();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
#endif // BATCHPERMUTATION_OP_H_
 | 
			
		||||
							
								
								
									
										269
									
								
								caffe2/operators/batch_permutation_op_gpu_test.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								caffe2/operators/batch_permutation_op_gpu_test.cc
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,269 @@
 | 
			
		||||
#include "caffe2/core/context_gpu.h"
 | 
			
		||||
#include "caffe2/core/flags.h"
 | 
			
		||||
#include "caffe2/operators/batch_permutation_op.h"
 | 
			
		||||
#include "caffe2/utils/eigen_utils.h"
 | 
			
		||||
#include "caffe2/utils/math.h"
 | 
			
		||||
#include "gtest/gtest.h"
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
// Add the vector as an input to a Workspace depending on the context of the
 | 
			
		||||
// workspace
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
void AddInputCPU(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<T>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws) {
 | 
			
		||||
  Blob* blob = ws->CreateBlob(name);
 | 
			
		||||
  auto* tensor = BlobGetMutableTensor(blob, CPU);
 | 
			
		||||
  tensor->Resize(shape);
 | 
			
		||||
  EigenVectorMap<T> tensor_vec(tensor->mutable_data<T>(), tensor->numel());
 | 
			
		||||
  tensor_vec.array() = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>{
 | 
			
		||||
      values.data(), static_cast<int>(values.size())};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
void AddInputGPU(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<T>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws) {
 | 
			
		||||
  Tensor tmp(shape, CPU);
 | 
			
		||||
  EigenVectorMap<T> tmp_vec(tmp.mutable_data<T>(), tmp.numel());
 | 
			
		||||
  tmp_vec.array() = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>{
 | 
			
		||||
      values.data(), static_cast<int>(values.size())};
 | 
			
		||||
 | 
			
		||||
  Blob* blob = ws->CreateBlob(name);
 | 
			
		||||
  auto* tensor = BlobGetMutableTensor(blob, CUDA);
 | 
			
		||||
  tensor->CopyFrom(tmp);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Overload 4 different signatures for AddInput because clang does not allow
 | 
			
		||||
// template <typename T>
 | 
			
		||||
// void AddInput<CPUContext>(...) {...}
 | 
			
		||||
 | 
			
		||||
template <typename T, class Context>
 | 
			
		||||
void AddInput(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<T>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws);
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
void AddInput<int, CPUContext>(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<int>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws) {
 | 
			
		||||
  AddInputCPU<int>(shape, values, name, ws);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
void AddInput<float, CPUContext>(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<float>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws) {
 | 
			
		||||
  AddInputCPU<float>(shape, values, name, ws);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
void AddInput<int, CUDAContext>(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<int>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws) {
 | 
			
		||||
  AddInputGPU<int>(shape, values, name, ws);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
void AddInput<float, CUDAContext>(
 | 
			
		||||
    const vector<int64_t>& shape,
 | 
			
		||||
    const vector<float>& values,
 | 
			
		||||
    const string& name,
 | 
			
		||||
    Workspace* ws) {
 | 
			
		||||
  AddInputGPU<float>(shape, values, name, ws);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <class Context>
 | 
			
		||||
DeviceTypeProto GetDeviceType() {
 | 
			
		||||
  return PROTO_CPU;
 | 
			
		||||
}
 | 
			
		||||
template <>
 | 
			
		||||
DeviceTypeProto GetDeviceType<CUDAContext>() {
 | 
			
		||||
  return PROTO_CUDA;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Create a BatchPermutationOp with the given inputs (actual values are
 | 
			
		||||
// generated sequentially) and run it
 | 
			
		||||
template <class Context>
 | 
			
		||||
void CreateAndRun(
 | 
			
		||||
    TensorCPU* outResult,
 | 
			
		||||
    int N,
 | 
			
		||||
    vector<int64_t>& shape,
 | 
			
		||||
    vector<float>& features,
 | 
			
		||||
    vector<int> indices) {
 | 
			
		||||
  Workspace ws;
 | 
			
		||||
 | 
			
		||||
  AddInput<float, Context>(shape, features, "X", &ws);
 | 
			
		||||
  AddInput<int, Context>(vector<int64_t>{N}, indices, "indices", &ws);
 | 
			
		||||
 | 
			
		||||
  OperatorDef def;
 | 
			
		||||
  def.set_name("test");
 | 
			
		||||
  def.set_type("BatchPermutation");
 | 
			
		||||
  def.add_input("X");
 | 
			
		||||
  def.add_input("indices");
 | 
			
		||||
  def.add_output("Y");
 | 
			
		||||
  def.mutable_device_option()->set_device_type(GetDeviceType<Context>());
 | 
			
		||||
  unique_ptr<OperatorBase> op = CreateOperator(def, &ws);
 | 
			
		||||
 | 
			
		||||
  EXPECT_NE(nullptr, op.get());
 | 
			
		||||
  EXPECT_TRUE(op->Run());
 | 
			
		||||
 | 
			
		||||
  Blob* Y_blob = ws.GetBlob("Y");
 | 
			
		||||
  EXPECT_NE(nullptr, Y_blob);
 | 
			
		||||
 | 
			
		||||
  auto& Y = Y_blob->Get<Tensor>();
 | 
			
		||||
  outResult->CopyFrom(Y);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Create a BatchPermutationOp with the given inputs (actual values are
 | 
			
		||||
// generated sequentially) and run it
 | 
			
		||||
template <class Context>
 | 
			
		||||
void CreateAndRunGradient(
 | 
			
		||||
    TensorCPU* outResult,
 | 
			
		||||
    int N,
 | 
			
		||||
    vector<int64_t>& shape,
 | 
			
		||||
    vector<float>& features,
 | 
			
		||||
    vector<int> indices) {
 | 
			
		||||
  Workspace ws;
 | 
			
		||||
 | 
			
		||||
  AddInput<float, Context>(shape, features, "dY", &ws);
 | 
			
		||||
  AddInput<int, Context>(vector<int64_t>{N}, indices, "indices", &ws);
 | 
			
		||||
 | 
			
		||||
  OperatorDef def;
 | 
			
		||||
  def.set_name("test");
 | 
			
		||||
  def.set_type("BatchPermutationGradient");
 | 
			
		||||
  def.add_input("indices");
 | 
			
		||||
  def.add_input("dY");
 | 
			
		||||
  def.add_output("dX");
 | 
			
		||||
  def.mutable_device_option()->set_device_type(GetDeviceType<Context>());
 | 
			
		||||
  unique_ptr<OperatorBase> op = CreateOperator(def, &ws);
 | 
			
		||||
 | 
			
		||||
  EXPECT_NE(nullptr, op.get());
 | 
			
		||||
  EXPECT_TRUE(op->Run());
 | 
			
		||||
 | 
			
		||||
  Blob* Y_blob = ws.GetBlob("dX");
 | 
			
		||||
  EXPECT_NE(nullptr, Y_blob);
 | 
			
		||||
 | 
			
		||||
  auto& Y = Y_blob->Get<Tensor>();
 | 
			
		||||
  outResult->CopyFrom(Y);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Check that the CPU and GPU implementations provide the exact same results
 | 
			
		||||
void CheckCPUGPUEqual(vector<int64_t> shape, vector<int> indices) {
 | 
			
		||||
  // Prepare input data
 | 
			
		||||
  EXPECT_GT(shape.size(), 1);
 | 
			
		||||
  int N = shape[0];
 | 
			
		||||
  int input_size = 1;
 | 
			
		||||
  for (auto k : shape) {
 | 
			
		||||
    input_size *= k;
 | 
			
		||||
  }
 | 
			
		||||
  int K = input_size / N;
 | 
			
		||||
  vector<float> features(input_size);
 | 
			
		||||
  std::iota(features.begin(), features.end(), 0);
 | 
			
		||||
 | 
			
		||||
  // CPU outputs
 | 
			
		||||
  Tensor y_cpu{CPU};
 | 
			
		||||
  Tensor y_cpu_grad{CPU};
 | 
			
		||||
 | 
			
		||||
  // CPU BatchPermutation
 | 
			
		||||
  CreateAndRun<CPUContext>(&y_cpu, N, shape, features, indices);
 | 
			
		||||
 | 
			
		||||
  // CPU BatchPermutationGradient
 | 
			
		||||
  CreateAndRunGradient<CPUContext>(&y_cpu_grad, N, shape, features, indices);
 | 
			
		||||
 | 
			
		||||
  // Check CPU output values
 | 
			
		||||
  for (auto i = 0; i < indices.size(); ++i) {
 | 
			
		||||
    for (auto k = 0; k < K; ++k) {
 | 
			
		||||
      EXPECT_NEAR(
 | 
			
		||||
          y_cpu.data<float>()[indices[i] * K + k], features[i * K + k], 1e4);
 | 
			
		||||
      EXPECT_NEAR(
 | 
			
		||||
          y_cpu_grad.data<float>()[i * K + k],
 | 
			
		||||
          features[indices[i] * K + k],
 | 
			
		||||
          1e4);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!caffe2::HasCudaGPU()) {
 | 
			
		||||
    VLOG(2) << "No CudaGPU found. Skip GPU test." << std::endl;
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // GPU outputs
 | 
			
		||||
  Tensor y_gpu{CPU};
 | 
			
		||||
  Tensor y_gpu_grad{CPU};
 | 
			
		||||
 | 
			
		||||
  // GPU BatchPermutation
 | 
			
		||||
  CreateAndRun<CPUContext>(&y_gpu, N, shape, features, indices);
 | 
			
		||||
 | 
			
		||||
  // Compare CPU and GPU BatchPermutation outputs
 | 
			
		||||
  EXPECT_EQ(y_cpu.sizes(), y_gpu.sizes());
 | 
			
		||||
  ConstEigenVectorMap<float> y_cpu_vec(y_cpu.data<float>(), y_cpu.numel());
 | 
			
		||||
  ConstEigenVectorMap<float> y_gpu_vec(y_gpu.data<float>(), y_gpu.numel());
 | 
			
		||||
  EXPECT_TRUE(y_cpu_vec.isApprox(y_gpu_vec));
 | 
			
		||||
 | 
			
		||||
  // GPU BatchPermutationGradient
 | 
			
		||||
  CreateAndRunGradient<CUDAContext>(&y_gpu_grad, N, shape, features, indices);
 | 
			
		||||
 | 
			
		||||
  // Check GPU outputs
 | 
			
		||||
  for (auto i = 0; i < indices.size(); ++i) {
 | 
			
		||||
    for (auto k = 0; k < K; ++k) {
 | 
			
		||||
      EXPECT_NEAR(
 | 
			
		||||
          y_gpu.data<float>()[indices[i] * K + k], features[i * K + k], 1e4);
 | 
			
		||||
      EXPECT_NEAR(
 | 
			
		||||
          y_gpu_grad.data<float>()[i * K + k],
 | 
			
		||||
          features[indices[i] * K + k],
 | 
			
		||||
          1e4);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Compare CPU and GPU BatchPermutationGradient outputs
 | 
			
		||||
  EXPECT_EQ(y_cpu_grad.sizes(), y_gpu_grad.sizes());
 | 
			
		||||
  ConstEigenVectorMap<float> y_cpu_vec_grad(
 | 
			
		||||
      y_cpu_grad.data<float>(), y_cpu_grad.numel());
 | 
			
		||||
  ConstEigenVectorMap<float> y_gpu_vec_grad(
 | 
			
		||||
      y_gpu_grad.data<float>(), y_gpu_grad.numel());
 | 
			
		||||
  EXPECT_TRUE(y_cpu_vec_grad.isApprox(y_gpu_vec_grad));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace
 | 
			
		||||
 | 
			
		||||
TEST(BatchPermutationTest, CHECKCPUGPUEqualGenericDimension) {
 | 
			
		||||
  auto t0 = std::chrono::high_resolution_clock::now();
 | 
			
		||||
  int batch_size = 8;
 | 
			
		||||
  int max_dimension = 6;
 | 
			
		||||
  vector<int64_t> shape = vector<int64_t>{batch_size};
 | 
			
		||||
 | 
			
		||||
  auto seed = std::chrono::system_clock::now().time_since_epoch().count();
 | 
			
		||||
  std::default_random_engine generator(seed);
 | 
			
		||||
 | 
			
		||||
  for (int i = 2; i < max_dimension; ++i) {
 | 
			
		||||
    std::uniform_int_distribution<> dis(1, i);
 | 
			
		||||
    shape.push_back(dis(generator));
 | 
			
		||||
    CheckCPUGPUEqual(shape, vector<int>{0, 1, 2, 3, 4, 5, 6, 7});
 | 
			
		||||
    CheckCPUGPUEqual(shape, vector<int>{7, 6, 5, 4, 3, 2, 1, 0});
 | 
			
		||||
    CheckCPUGPUEqual(shape, vector<int>{1, 3, 5, 7, 0, 2, 4, 6});
 | 
			
		||||
    CheckCPUGPUEqual(shape, vector<int>{4, 5, 6, 7, 0, 1, 2, 3});
 | 
			
		||||
    CheckCPUGPUEqual(shape, vector<int>{3, 1, 5, 7, 6, 2, 4, 0});
 | 
			
		||||
  }
 | 
			
		||||
  auto t1 = std::chrono::high_resolution_clock::now();
 | 
			
		||||
  double elapsed =
 | 
			
		||||
      std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
 | 
			
		||||
  VLOG(2) << "Time elapsed: " << elapsed << " ms" << std::endl;
 | 
			
		||||
  return;
 | 
			
		||||
}
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
@ -136,7 +136,7 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
 | 
			
		||||
    bias_data = Input(BIAS).template data<T>();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  auto f = [&](Tensor* col_buffer) {
 | 
			
		||||
  auto f = [this, &filter_offset, &bias_data, &X, &buffer_shape, &N, &Xdata, &offset_data, &M, &filter, &output_image_size, &kernel_dim, &Ydata, &input_offset, &offset_offset, &output_offset] (Tensor* col_buffer) {
 | 
			
		||||
    col_buffer->Resize(buffer_shape);
 | 
			
		||||
    T* col_buffer_data = col_buffer->template mutable_data<T>();
 | 
			
		||||
    // Im2col, followed by gemm.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										31
									
								
								caffe2/python/operator_test/alias_with_name_test.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								caffe2/python/operator_test/alias_with_name_test.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,31 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import caffe2.python.hypothesis_test_util as hu
 | 
			
		||||
import hypothesis.strategies as st
 | 
			
		||||
import numpy as np
 | 
			
		||||
from caffe2.python import core, utils
 | 
			
		||||
from hypothesis import given
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestAliasWithNameOp(hu.HypothesisTestCase):
 | 
			
		||||
    @given(
 | 
			
		||||
        shape=st.lists(st.integers(0, 5), min_size=1, max_size=3),
 | 
			
		||||
        dtype=st.sampled_from([np.float32, np.int64]),
 | 
			
		||||
        **hu.gcs
 | 
			
		||||
    )
 | 
			
		||||
    def test_alias_with_name_op(self, shape, dtype, dc, gc):
 | 
			
		||||
        test_input = (100 * np.random.random(shape)).astype(dtype)
 | 
			
		||||
        test_inputs = [test_input]
 | 
			
		||||
 | 
			
		||||
        alias_op = core.CreateOperator(
 | 
			
		||||
            "AliasWithName",
 | 
			
		||||
            ["input"],
 | 
			
		||||
            ["output"],
 | 
			
		||||
            device_option=gc,
 | 
			
		||||
        )
 | 
			
		||||
        alias_op.arg.add().CopyFrom(utils.MakeArgument("name", "whatever_name"))
 | 
			
		||||
 | 
			
		||||
        def reference_func(x):
 | 
			
		||||
            return (x,)
 | 
			
		||||
 | 
			
		||||
        self.assertReferenceChecks(gc, alias_op, test_inputs, reference_func)
 | 
			
		||||
@ -139,6 +139,7 @@ class TestMomentumSGD(serial.SerializedTestCase):
 | 
			
		||||
            [grad, m, lr, w, indices],
 | 
			
		||||
            sparse)
 | 
			
		||||
 | 
			
		||||
    @unittest.skip("Test is flaky, see https://github.com/pytorch/pytorch/issues/31368")
 | 
			
		||||
    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
 | 
			
		||||
    @given(n=st.integers(4, 8), nesterov=st.booleans(), **hu.gcs)
 | 
			
		||||
    def test_fp16momentum_sgd(self, n, nesterov, gc, dc):
 | 
			
		||||
 | 
			
		||||
@ -710,6 +710,15 @@ class TorchIntegration(hu.HypothesisTestCase):
 | 
			
		||||
 | 
			
		||||
        torch.testing.assert_allclose(torch.tensor(expected_output), actual_output)
 | 
			
		||||
 | 
			
		||||
    def test_alias_with_name_is_in_place(self):
 | 
			
		||||
        device = "cuda" if workspace.has_cuda_support else "cpu"
 | 
			
		||||
        x = torch.Tensor([3, 42]).to(device)
 | 
			
		||||
        y = torch.ops._caffe2.AliasWithName(x, "new_name")
 | 
			
		||||
        x[1] = 6
 | 
			
		||||
        torch.testing.assert_allclose(x, torch.Tensor([3, 6]).to(device))
 | 
			
		||||
        # y should also change because y is alias of x
 | 
			
		||||
        torch.testing.assert_allclose(y, torch.Tensor([3, 6]).to(device))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 | 
			
		||||
@ -36,6 +36,7 @@ torch.optim
 | 
			
		||||
 | 
			
		||||
-  Vincent Quenneville-Belair (`vincentqb <https://github.com/vincentqb>`__)
 | 
			
		||||
-  Soumith Chintala (`soumith <https://github.com/soumith>`__)
 | 
			
		||||
-  Wanchao Liang (`wanchaol <https://github.com/wanchaol>`__)
 | 
			
		||||
 | 
			
		||||
Autograd Engine
 | 
			
		||||
~~~~~~~~~~~~~~~
 | 
			
		||||
@ -95,6 +96,20 @@ MKLDNN
 | 
			
		||||
-  Junjie Bai (`bddppq <https://github.com/bddppq>`__)
 | 
			
		||||
-  Yinghai Lu (`yinghai <https://github.com/yinghai>`__)
 | 
			
		||||
 | 
			
		||||
Mobile
 | 
			
		||||
~~~~~~
 | 
			
		||||
 | 
			
		||||
-  David Reiss (`dreiss <https://github.com/dreiss>`__)
 | 
			
		||||
-  Jiakai Liu (`ljk53 <https://github.com/ljk53>`__)
 | 
			
		||||
 | 
			
		||||
Quantization
 | 
			
		||||
~~~~~~
 | 
			
		||||
 | 
			
		||||
-  Raghuraman Krishnamoorthi (`dreiss <https://github.com/dreiss>`__)
 | 
			
		||||
-  Jerry Zhang (`jerryzh168 <https://github.com/jerryzh168>`__)
 | 
			
		||||
-  Lingyi Liu (`lly-zero-one <https://github.com/lly-zero-one>`__)
 | 
			
		||||
-  James Reed (`jamesr66a <https://github.com/jamesr66a>`__)
 | 
			
		||||
 | 
			
		||||
XLA
 | 
			
		||||
~~~
 | 
			
		||||
 | 
			
		||||
@ -138,6 +153,9 @@ ONNX <-> PyTorch
 | 
			
		||||
~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
-  Lu Fang (`houseroad <https://github.com/houseroad>`__)
 | 
			
		||||
-  Lara Haidar (`lara-hdr <https://github.com/lara-hdr>`__)
 | 
			
		||||
-  Spandan Tiwari (`spandantiwari <https://github.com/spandantiwari>`__)
 | 
			
		||||
-  Bowen Bao (`BowenBao <https://github.com/BowenBao>`__)
 | 
			
		||||
 | 
			
		||||
Windows
 | 
			
		||||
~~~~~~~
 | 
			
		||||
 | 
			
		||||
@ -46,7 +46,6 @@ extensions = [
 | 
			
		||||
    'sphinx.ext.autodoc',
 | 
			
		||||
    'sphinx.ext.autosummary',
 | 
			
		||||
    'sphinx.ext.doctest',
 | 
			
		||||
    'sphinx.ext.intersphinx',
 | 
			
		||||
    'sphinx.ext.todo',
 | 
			
		||||
    'sphinx.ext.coverage',
 | 
			
		||||
    'sphinx.ext.napoleon',
 | 
			
		||||
 | 
			
		||||
@ -17,13 +17,13 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
 | 
			
		||||
 | 
			
		||||
   notes/*
 | 
			
		||||
   PyTorch on XLA Devices <http://pytorch.org/xla/>
 | 
			
		||||
 | 
			
		||||
   
 | 
			
		||||
.. toctree::
 | 
			
		||||
  :glob:
 | 
			
		||||
  :maxdepth: 1
 | 
			
		||||
  :caption: Community
 | 
			
		||||
   :maxdepth: 1
 | 
			
		||||
   :caption: Language Bindings
 | 
			
		||||
 | 
			
		||||
  community/*
 | 
			
		||||
   C++ API <https://pytorch.org/cppdocs/>
 | 
			
		||||
   Javadoc <https://pytorch.org/javadoc/>
 | 
			
		||||
 | 
			
		||||
.. toctree::
 | 
			
		||||
   :maxdepth: 1
 | 
			
		||||
@ -78,13 +78,13 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
 | 
			
		||||
   :caption: torchtext Reference
 | 
			
		||||
 | 
			
		||||
   torchtext <https://pytorch.org/text>
 | 
			
		||||
 | 
			
		||||
   
 | 
			
		||||
.. toctree::
 | 
			
		||||
   :maxdepth: 1
 | 
			
		||||
   :caption: Other Languages
 | 
			
		||||
  :glob:
 | 
			
		||||
  :maxdepth: 1
 | 
			
		||||
  :caption: Community
 | 
			
		||||
 | 
			
		||||
   C++ API <https://pytorch.org/cppdocs/>
 | 
			
		||||
   packages
 | 
			
		||||
  community/*
 | 
			
		||||
 | 
			
		||||
Indices and tables
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
@ -314,13 +314,14 @@ The :class:`~torch.distributed.optim.DistributedOptimizer` operates as follows:
 | 
			
		||||
Simple end to end example
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Putting it all together, a very simple end to end example using distributed
 | 
			
		||||
autograd and distributed optimizer is as follows:
 | 
			
		||||
Putting it all together, the following is a simple end to end example using
 | 
			
		||||
distributed autograd and the distributed optimizer. If the code is placed into a
 | 
			
		||||
file called "dist_autograd_simple.py", it can be run with the command
 | 
			
		||||
:code:`MASTER_ADDR="localhost" MASTER_PORT=29500 python dist_autograd_simple.py`:
 | 
			
		||||
 | 
			
		||||
.. code::
 | 
			
		||||
 | 
			
		||||
  import multiprocessing as mp
 | 
			
		||||
  from tempfile import NamedTemporaryFile
 | 
			
		||||
  import torch
 | 
			
		||||
  import torch.distributed.autograd as dist_autograd
 | 
			
		||||
  from torch.distributed import rpc
 | 
			
		||||
@ -330,52 +331,52 @@ autograd and distributed optimizer is as follows:
 | 
			
		||||
  def random_tensor():
 | 
			
		||||
      return torch.rand((3, 3), requires_grad=True)
 | 
			
		||||
 | 
			
		||||
  def _run_process(self_rank, dst_rank, file_name):
 | 
			
		||||
      self_name = "worker{}".format(self_rank)
 | 
			
		||||
  def _run_process(rank, dst_rank, world_size):
 | 
			
		||||
      name = "worker{}".format(rank)
 | 
			
		||||
      dst_name = "worker{}".format(dst_rank)
 | 
			
		||||
 | 
			
		||||
      # Initialize RPC.
 | 
			
		||||
      rpc.init_rpc(
 | 
			
		||||
          self_name=self_name,
 | 
			
		||||
          self_rank=self_rank,
 | 
			
		||||
          worker_name_to_id={"worker0": 0, "worker1": 1},
 | 
			
		||||
          init_method="file://{}".format(file_name),
 | 
			
		||||
          name=name,
 | 
			
		||||
          rank=rank,
 | 
			
		||||
          world_size=world_size
 | 
			
		||||
      )
 | 
			
		||||
 | 
			
		||||
      # Use a distributed autograd context.
 | 
			
		||||
      with dist_autograd.context() as context_id:
 | 
			
		||||
         # Forward pass (create references on remote nodes).
 | 
			
		||||
         rref1 = rpc.remote(dst_name, random_tensor)
 | 
			
		||||
         rref2 = rpc.remote(dst_name, random_tensor)
 | 
			
		||||
         loss = rref1.to_here() + rref2.to_here()
 | 
			
		||||
          # Forward pass (create references on remote nodes).
 | 
			
		||||
          rref1 = rpc.remote(dst_name, random_tensor)
 | 
			
		||||
          rref2 = rpc.remote(dst_name, random_tensor)
 | 
			
		||||
          loss = rref1.to_here() + rref2.to_here()
 | 
			
		||||
 | 
			
		||||
         # Backward pass (run distributed autograd).
 | 
			
		||||
         dist_autograd.backward([loss.sum()])
 | 
			
		||||
          # Backward pass (run distributed autograd).
 | 
			
		||||
          dist_autograd.backward([loss.sum()])
 | 
			
		||||
 | 
			
		||||
         # Build DistributedOptimizer.
 | 
			
		||||
         dist_optim = DistributedOptimizer(
 | 
			
		||||
           optim.SGD,
 | 
			
		||||
           [rref1, rref2],
 | 
			
		||||
           lr=0.05,
 | 
			
		||||
         )
 | 
			
		||||
          # Build DistributedOptimizer.
 | 
			
		||||
          dist_optim = DistributedOptimizer(
 | 
			
		||||
          optim.SGD,
 | 
			
		||||
          [rref1, rref2],
 | 
			
		||||
          lr=0.05,
 | 
			
		||||
          )
 | 
			
		||||
 | 
			
		||||
         # Run the distributed optimizer step.
 | 
			
		||||
         dist_optim.step()
 | 
			
		||||
          # Run the distributed optimizer step.
 | 
			
		||||
          dist_optim.step()
 | 
			
		||||
 | 
			
		||||
  def run_process(self_rank, dst_rank, file_name):
 | 
			
		||||
      _run_process(self_rank, dst_rank, file_name)
 | 
			
		||||
      rpc.wait_all_workers()
 | 
			
		||||
  def run_process(rank, dst_rank, world_size):
 | 
			
		||||
      _run_process(rank, dst_rank, world_size)
 | 
			
		||||
      rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
  file_name = NamedTemporaryFile().name
 | 
			
		||||
  processes = []
 | 
			
		||||
 | 
			
		||||
  # Run two workers.
 | 
			
		||||
  for i in range(2):
 | 
			
		||||
      p = mp.Process(target=run_process, args=(i, (i + 1) % 2, file_name))
 | 
			
		||||
  # Run world_size workers.
 | 
			
		||||
  world_size = 2
 | 
			
		||||
  for i in range(world_size):
 | 
			
		||||
      p = mp.Process(target=run_process, args=(i, (i + 1) % 2, world_size))
 | 
			
		||||
      p.start()
 | 
			
		||||
      processes.append(p)
 | 
			
		||||
 | 
			
		||||
  for p in processes:
 | 
			
		||||
      p.join()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _RFC: https://github.com/pytorch/pytorch/issues/23110
 | 
			
		||||
 | 
			
		||||
@ -1,67 +0,0 @@
 | 
			
		||||
DType
 | 
			
		||||
=====
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: public enum DType
 | 
			
		||||
 | 
			
		||||
   Codes representing tensor data types.
 | 
			
		||||
 | 
			
		||||
Enum Constants
 | 
			
		||||
--------------
 | 
			
		||||
FLOAT32
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static final DType FLOAT32
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
 | 
			
		||||
   Code for dtype torch.float32. \ :java:ref:`Tensor.dtype()`\
 | 
			
		||||
 | 
			
		||||
FLOAT64
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static final DType FLOAT64
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
 | 
			
		||||
   Code for dtype torch.float64. \ :java:ref:`Tensor.dtype()`\
 | 
			
		||||
 | 
			
		||||
INT32
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static final DType INT32
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
 | 
			
		||||
   Code for dtype torch.int32. \ :java:ref:`Tensor.dtype()`\
 | 
			
		||||
 | 
			
		||||
INT64
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static final DType INT64
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
 | 
			
		||||
   Code for dtype torch.int64. \ :java:ref:`Tensor.dtype()`\
 | 
			
		||||
 | 
			
		||||
INT8
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static final DType INT8
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
 | 
			
		||||
   Code for dtype torch.int8. \ :java:ref:`Tensor.dtype()`\
 | 
			
		||||
 | 
			
		||||
UINT8
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static final DType UINT8
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
 | 
			
		||||
   Code for dtype torch.uint8. \ :java:ref:`Tensor.dtype()`\
 | 
			
		||||
 | 
			
		||||
Fields
 | 
			
		||||
------
 | 
			
		||||
jniCode
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: final int jniCode
 | 
			
		||||
   :outertype: DType
 | 
			
		||||
@ -1,297 +0,0 @@
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Map
 | 
			
		||||
 | 
			
		||||
IValue
 | 
			
		||||
======
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: public class IValue
 | 
			
		||||
 | 
			
		||||
   Java representation of a TorchScript value, which is implemented as tagged union that can be one of the supported types: https://pytorch.org/docs/stable/jit.html#types .
 | 
			
		||||
 | 
			
		||||
   Calling \ ``toX``\  methods for inappropriate types will throw \ :java:ref:`IllegalStateException`\ .
 | 
			
		||||
 | 
			
		||||
   \ ``IValue``\  objects are constructed with \ ``IValue.from(value)``\ , \ ``IValue.tupleFrom(value1, value2, ...)``\ , \ ``IValue.listFrom(value1, value2, ...)``\ , or one of the \ ``dict``\  methods, depending on the key type.
 | 
			
		||||
 | 
			
		||||
   Data is retrieved from \ ``IValue``\  objects with the \ ``toX()``\  methods. Note that \ ``str``\ -type IValues must be extracted with \ :java:ref:`toStr()`\ , rather than \ :java:ref:`toString()`\ .
 | 
			
		||||
 | 
			
		||||
   \ ``IValue``\  objects may retain references to objects passed into their constructors, and may return references to their internal state from \ ``toX()``\ .
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dictLongKeyFrom
 | 
			
		||||
^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue dictLongKeyFrom(Map<Long, IValue> map)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``Dict[int, V]``\ .
 | 
			
		||||
 | 
			
		||||
dictStringKeyFrom
 | 
			
		||||
^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue dictStringKeyFrom(Map<String, IValue> map)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``Dict[str, V]``\ .
 | 
			
		||||
 | 
			
		||||
from
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue from(Tensor tensor)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``Tensor``\ .
 | 
			
		||||
 | 
			
		||||
from
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue from(boolean value)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``bool``\ .
 | 
			
		||||
 | 
			
		||||
from
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue from(long value)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``int``\ .
 | 
			
		||||
 | 
			
		||||
from
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue from(double value)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``float``\ .
 | 
			
		||||
 | 
			
		||||
from
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue from(String value)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``str``\ .
 | 
			
		||||
 | 
			
		||||
isBool
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isBool()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isBoolList
 | 
			
		||||
^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isBoolList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isDictLongKey
 | 
			
		||||
^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isDictLongKey()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isDictStringKey
 | 
			
		||||
^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isDictStringKey()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isDouble
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isDouble()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isDoubleList
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isDoubleList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isList
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isLong
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isLong()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isLongList
 | 
			
		||||
^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isLongList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isNull
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isNull()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isString()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isTensor
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isTensor()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isTensorList
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isTensorList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
isTuple
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean isTuple()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
listFrom
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue listFrom(boolean... list)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``List[bool]``\ .
 | 
			
		||||
 | 
			
		||||
listFrom
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue listFrom(long... list)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``List[int]``\ .
 | 
			
		||||
 | 
			
		||||
listFrom
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue listFrom(double... list)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``List[float]``\ .
 | 
			
		||||
 | 
			
		||||
listFrom
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue listFrom(Tensor... list)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``List[Tensor]``\ .
 | 
			
		||||
 | 
			
		||||
listFrom
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue listFrom(IValue... array)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``List[T]``\ . All elements must have the same type.
 | 
			
		||||
 | 
			
		||||
optionalNull
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue optionalNull()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``Optional``\  that contains no value.
 | 
			
		||||
 | 
			
		||||
toBool
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean toBool()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toBoolList
 | 
			
		||||
^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public boolean[] toBoolList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toDictLongKey
 | 
			
		||||
^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public Map<Long, IValue> toDictLongKey()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toDictStringKey
 | 
			
		||||
^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public Map<String, IValue> toDictStringKey()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toDouble
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public double toDouble()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toDoubleList
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public double[] toDoubleList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toList
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public IValue[] toList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toLong
 | 
			
		||||
^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public long toLong()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toLongList
 | 
			
		||||
^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public long[] toLongList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toStr
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public String toStr()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toTensor
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public Tensor toTensor()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toTensorList
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public Tensor[] toTensorList()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
toTuple
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public IValue[] toTuple()
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
tupleFrom
 | 
			
		||||
^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IValue tupleFrom(IValue... array)
 | 
			
		||||
   :outertype: IValue
 | 
			
		||||
 | 
			
		||||
   Creates a new \ ``IValue``\  of type \ ``Tuple[T0, T1, ...]``\ .
 | 
			
		||||
@ -1,55 +0,0 @@
 | 
			
		||||
.. java:import:: com.facebook.jni HybridData
 | 
			
		||||
 | 
			
		||||
Module
 | 
			
		||||
======
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: public class Module
 | 
			
		||||
 | 
			
		||||
   Java wrapper for torch::jit::script::Module.
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
destroy
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public void destroy()
 | 
			
		||||
   :outertype: Module
 | 
			
		||||
 | 
			
		||||
   Explicitly destroys the native torch::jit::script::Module. Calling this method is not required, as the native object will be destroyed when this object is garbage-collected. However, the timing of garbage collection is not guaranteed, so proactively calling \ ``destroy``\  can free memory more quickly. See \ :java:ref:`com.facebook.jni.HybridData.resetNative`\ .
 | 
			
		||||
 | 
			
		||||
forward
 | 
			
		||||
^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public IValue forward(IValue... inputs)
 | 
			
		||||
   :outertype: Module
 | 
			
		||||
 | 
			
		||||
   Runs the 'forward' method of this module with the specified arguments.
 | 
			
		||||
 | 
			
		||||
   :param inputs: arguments for the TorchScript module's 'forward' method.
 | 
			
		||||
   :return: return value from the 'forward' method.
 | 
			
		||||
 | 
			
		||||
load
 | 
			
		||||
^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Module load(String modelPath)
 | 
			
		||||
   :outertype: Module
 | 
			
		||||
 | 
			
		||||
   Loads a serialized TorchScript module from the specified path on the disk.
 | 
			
		||||
 | 
			
		||||
   :param modelPath: path to file that contains the serialized TorchScript module.
 | 
			
		||||
   :return: new \ :java:ref:`org.pytorch.Module`\  object which owns torch::jit::script::Module.
 | 
			
		||||
 | 
			
		||||
runMethod
 | 
			
		||||
^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public IValue runMethod(String methodName, IValue... inputs)
 | 
			
		||||
   :outertype: Module
 | 
			
		||||
 | 
			
		||||
   Runs the specified method of this module with the specified arguments.
 | 
			
		||||
 | 
			
		||||
   :param methodName: name of the TorchScript method to run.
 | 
			
		||||
   :param inputs: arguments that will be passed to TorchScript method.
 | 
			
		||||
   :return: return value from the method.
 | 
			
		||||
@ -1,60 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor.Tensor_float32
 | 
			
		||||
=====================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: static class Tensor_float32 extends Tensor
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
Constructors
 | 
			
		||||
------------
 | 
			
		||||
Tensor_float32
 | 
			
		||||
^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:constructor::  Tensor_float32(FloatBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor.Tensor_float32
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public DType dtype()
 | 
			
		||||
   :outertype: Tensor.Tensor_float32
 | 
			
		||||
 | 
			
		||||
getDataAsFloatArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public float[] getDataAsFloatArray()
 | 
			
		||||
   :outertype: Tensor.Tensor_float32
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor.Tensor_float32
 | 
			
		||||
 | 
			
		||||
toString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public String toString()
 | 
			
		||||
   :outertype: Tensor.Tensor_float32
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor.Tensor_float64
 | 
			
		||||
=====================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: static class Tensor_float64 extends Tensor
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public DType dtype()
 | 
			
		||||
   :outertype: Tensor.Tensor_float64
 | 
			
		||||
 | 
			
		||||
getDataAsDoubleArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public double[] getDataAsDoubleArray()
 | 
			
		||||
   :outertype: Tensor.Tensor_float64
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor.Tensor_float64
 | 
			
		||||
 | 
			
		||||
toString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public String toString()
 | 
			
		||||
   :outertype: Tensor.Tensor_float64
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor.Tensor_int32
 | 
			
		||||
===================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: static class Tensor_int32 extends Tensor
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public DType dtype()
 | 
			
		||||
   :outertype: Tensor.Tensor_int32
 | 
			
		||||
 | 
			
		||||
getDataAsIntArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public int[] getDataAsIntArray()
 | 
			
		||||
   :outertype: Tensor.Tensor_int32
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor.Tensor_int32
 | 
			
		||||
 | 
			
		||||
toString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public String toString()
 | 
			
		||||
   :outertype: Tensor.Tensor_int32
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor.Tensor_int64
 | 
			
		||||
===================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: static class Tensor_int64 extends Tensor
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public DType dtype()
 | 
			
		||||
   :outertype: Tensor.Tensor_int64
 | 
			
		||||
 | 
			
		||||
getDataAsLongArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public long[] getDataAsLongArray()
 | 
			
		||||
   :outertype: Tensor.Tensor_int64
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor.Tensor_int64
 | 
			
		||||
 | 
			
		||||
toString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public String toString()
 | 
			
		||||
   :outertype: Tensor.Tensor_int64
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor.Tensor_int8
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: static class Tensor_int8 extends Tensor
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public DType dtype()
 | 
			
		||||
   :outertype: Tensor.Tensor_int8
 | 
			
		||||
 | 
			
		||||
getDataAsByteArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public byte[] getDataAsByteArray()
 | 
			
		||||
   :outertype: Tensor.Tensor_int8
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor.Tensor_int8
 | 
			
		||||
 | 
			
		||||
toString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public String toString()
 | 
			
		||||
   :outertype: Tensor.Tensor_int8
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor.Tensor_uint8
 | 
			
		||||
===================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: static class Tensor_uint8 extends Tensor
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public DType dtype()
 | 
			
		||||
   :outertype: Tensor.Tensor_uint8
 | 
			
		||||
 | 
			
		||||
getDataAsUnsignedByteArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public byte[] getDataAsUnsignedByteArray()
 | 
			
		||||
   :outertype: Tensor.Tensor_uint8
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor.Tensor_uint8
 | 
			
		||||
 | 
			
		||||
toString
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: @Override public String toString()
 | 
			
		||||
   :outertype: Tensor.Tensor_uint8
 | 
			
		||||
@ -1,315 +0,0 @@
 | 
			
		||||
.. java:import:: java.nio Buffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteOrder
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio DoubleBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio IntBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio LongBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Arrays
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
Tensor
 | 
			
		||||
======
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: public abstract class Tensor
 | 
			
		||||
 | 
			
		||||
   Representation of a Tensor. Behavior is similar to PyTorch's tensor objects.
 | 
			
		||||
 | 
			
		||||
   Most tensors will be constructed as \ ``Tensor.fromBlob(data, shape)``\ , where \ ``data``\  can be an array or a direct \ :java:ref:`Buffer`\  (of the proper subclass). Helper methods are provided to allocate buffers properly.
 | 
			
		||||
 | 
			
		||||
   To access Tensor data, see \ :java:ref:`dtype()`\ , \ :java:ref:`shape()`\ , and various \ ``getDataAs*``\  methods.
 | 
			
		||||
 | 
			
		||||
   When constructing \ ``Tensor``\  objects with \ ``data``\  as an array, it is not specified whether this data is is copied or retained as a reference so it is recommended not to modify it after constructing. \ ``data``\  passed as a \ :java:ref:`Buffer`\  is not copied, so it can be modified between \ :java:ref:`Module`\  calls to avoid reallocation. Data retrieved from \ ``Tensor``\  objects may be copied or may be a reference to the \ ``Tensor``\ 's internal data buffer. \ ``shape``\  is always copied.
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
allocateByteBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static ByteBuffer allocateByteBuffer(int numElements)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Allocates a new direct \ :java:ref:`java.nio.ByteBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(ByteBuffer,long[])`\ , \ :java:ref:`Tensor.fromBlobUnsigned(ByteBuffer,long[])`\ .
 | 
			
		||||
 | 
			
		||||
   :param numElements: capacity (number of elements) of result buffer.
 | 
			
		||||
 | 
			
		||||
allocateDoubleBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static DoubleBuffer allocateDoubleBuffer(int numElements)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Allocates a new direct \ :java:ref:`java.nio.DoubleBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(DoubleBuffer,long[])`\ .
 | 
			
		||||
 | 
			
		||||
   :param numElements: capacity (number of elements) of result buffer.
 | 
			
		||||
 | 
			
		||||
allocateFloatBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static FloatBuffer allocateFloatBuffer(int numElements)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Allocates a new direct \ :java:ref:`java.nio.FloatBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(FloatBuffer,long[])`\ .
 | 
			
		||||
 | 
			
		||||
   :param numElements: capacity (number of elements) of result buffer.
 | 
			
		||||
 | 
			
		||||
allocateIntBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static IntBuffer allocateIntBuffer(int numElements)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Allocates a new direct \ :java:ref:`java.nio.IntBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(IntBuffer,long[])`\ .
 | 
			
		||||
 | 
			
		||||
   :param numElements: capacity (number of elements) of result buffer.
 | 
			
		||||
 | 
			
		||||
allocateLongBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static LongBuffer allocateLongBuffer(int numElements)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Allocates a new direct \ :java:ref:`java.nio.LongBuffer`\  with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(LongBuffer,long[])`\ .
 | 
			
		||||
 | 
			
		||||
   :param numElements: capacity (number of elements) of result buffer.
 | 
			
		||||
 | 
			
		||||
dtype
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public abstract DType dtype()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :return: data type of this tensor.
 | 
			
		||||
 | 
			
		||||
dtypeJniCode
 | 
			
		||||
^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method::  int dtypeJniCode()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(byte[] data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.int8 with specified shape and data as array of bytes.
 | 
			
		||||
 | 
			
		||||
   :param data: Tensor elements
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(int[] data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.int32 with specified shape and data as array of ints.
 | 
			
		||||
 | 
			
		||||
   :param data: Tensor elements
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(float[] data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.float32 with specified shape and data as array of floats.
 | 
			
		||||
 | 
			
		||||
   :param data: Tensor elements
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(long[] data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.int64 with specified shape and data as array of longs.
 | 
			
		||||
 | 
			
		||||
   :param data: Tensor elements
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(long[] shape, double[] data)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.float64 with specified shape and data as array of doubles.
 | 
			
		||||
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
   :param data: Tensor elements
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(ByteBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.int8 with specified shape and data.
 | 
			
		||||
 | 
			
		||||
   :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor.
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(IntBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.int32 with specified shape and data.
 | 
			
		||||
 | 
			
		||||
   :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor.
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(FloatBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.float32 with specified shape and data.
 | 
			
		||||
 | 
			
		||||
   :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor.
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(LongBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.int64 with specified shape and data.
 | 
			
		||||
 | 
			
		||||
   :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor.
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlob
 | 
			
		||||
^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlob(DoubleBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.float64 with specified shape and data.
 | 
			
		||||
 | 
			
		||||
   :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor.
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlobUnsigned
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlobUnsigned(byte[] data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.uint8 with specified shape and data as array of bytes.
 | 
			
		||||
 | 
			
		||||
   :param data: Tensor elements
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
fromBlobUnsigned
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor fromBlobUnsigned(ByteBuffer data, long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Creates a new Tensor instance with dtype torch.uint8 with specified shape and data.
 | 
			
		||||
 | 
			
		||||
   :param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\  elements. The buffer is used directly without copying, and changes to its content will change the tensor.
 | 
			
		||||
   :param shape: Tensor shape
 | 
			
		||||
 | 
			
		||||
getDataAsByteArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public byte[] getDataAsByteArray()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :throws IllegalStateException: if it is called for a non-int8 tensor.
 | 
			
		||||
   :return: a Java byte array that contains the tensor data. This may be a copy or reference.
 | 
			
		||||
 | 
			
		||||
getDataAsDoubleArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public double[] getDataAsDoubleArray()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :throws IllegalStateException: if it is called for a non-float64 tensor.
 | 
			
		||||
   :return: a Java double array that contains the tensor data. This may be a copy or reference.
 | 
			
		||||
 | 
			
		||||
getDataAsFloatArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public float[] getDataAsFloatArray()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :throws IllegalStateException: if it is called for a non-float32 tensor.
 | 
			
		||||
   :return: a Java float array that contains the tensor data. This may be a copy or reference.
 | 
			
		||||
 | 
			
		||||
getDataAsIntArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public int[] getDataAsIntArray()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :throws IllegalStateException: if it is called for a non-int32 tensor.
 | 
			
		||||
   :return: a Java int array that contains the tensor data. This may be a copy or reference.
 | 
			
		||||
 | 
			
		||||
getDataAsLongArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public long[] getDataAsLongArray()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :throws IllegalStateException: if it is called for a non-int64 tensor.
 | 
			
		||||
   :return: a Java long array that contains the tensor data. This may be a copy or reference.
 | 
			
		||||
 | 
			
		||||
getDataAsUnsignedByteArray
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public byte[] getDataAsUnsignedByteArray()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   :throws IllegalStateException: if it is called for a non-uint8 tensor.
 | 
			
		||||
   :return: a Java byte array that contains the tensor data. This may be a copy or reference.
 | 
			
		||||
 | 
			
		||||
getRawDataBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method::  Buffer getRawDataBuffer()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
numel
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public long numel()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Returns the number of elements in this tensor.
 | 
			
		||||
 | 
			
		||||
numel
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static long numel(long[] shape)
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Calculates the number of elements in a tensor with the specified shape.
 | 
			
		||||
 | 
			
		||||
shape
 | 
			
		||||
^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public long[] shape()
 | 
			
		||||
   :outertype: Tensor
 | 
			
		||||
 | 
			
		||||
   Returns the shape of this tensor. (The array is a fresh copy.)
 | 
			
		||||
@ -1,114 +0,0 @@
 | 
			
		||||
.. java:import:: android.graphics Bitmap
 | 
			
		||||
 | 
			
		||||
.. java:import:: android.graphics ImageFormat
 | 
			
		||||
 | 
			
		||||
.. java:import:: android.media Image
 | 
			
		||||
 | 
			
		||||
.. java:import:: org.pytorch Tensor
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio ByteBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.nio FloatBuffer
 | 
			
		||||
 | 
			
		||||
.. java:import:: java.util Locale
 | 
			
		||||
 | 
			
		||||
TensorImageUtils
 | 
			
		||||
================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch.torchvision
 | 
			
		||||
   :noindex:
 | 
			
		||||
 | 
			
		||||
.. java:type:: public final class TensorImageUtils
 | 
			
		||||
 | 
			
		||||
   Contains utility functions for \ :java:ref:`org.pytorch.Tensor`\  creation from \ :java:ref:`android.graphics.Bitmap`\  or \ :java:ref:`android.media.Image`\  source.
 | 
			
		||||
 | 
			
		||||
Fields
 | 
			
		||||
------
 | 
			
		||||
TORCHVISION_NORM_MEAN_RGB
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static float[] TORCHVISION_NORM_MEAN_RGB
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
TORCHVISION_NORM_STD_RGB
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:field:: public static float[] TORCHVISION_NORM_STD_RGB
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
Methods
 | 
			
		||||
-------
 | 
			
		||||
bitmapToFloat32Tensor
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor bitmapToFloat32Tensor(Bitmap bitmap, float[] normMeanRGB, float[] normStdRGB)
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
   Creates new \ :java:ref:`org.pytorch.Tensor`\  from full \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std.
 | 
			
		||||
 | 
			
		||||
   :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
   :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
 | 
			
		||||
bitmapToFloat32Tensor
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor bitmapToFloat32Tensor(Bitmap bitmap, int x, int y, int width, int height, float[] normMeanRGB, float[] normStdRGB)
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
   Creates new \ :java:ref:`org.pytorch.Tensor`\  from specified area of \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std.
 | 
			
		||||
 | 
			
		||||
   :param bitmap: \ :java:ref:`android.graphics.Bitmap`\  as a source for Tensor data
 | 
			
		||||
   :param x: - x coordinate of top left corner of bitmap's area
 | 
			
		||||
   :param y: - y coordinate of top left corner of bitmap's area
 | 
			
		||||
   :param width: - width of bitmap's area
 | 
			
		||||
   :param height: - height of bitmap's area
 | 
			
		||||
   :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
   :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
 | 
			
		||||
bitmapToFloatBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static void bitmapToFloatBuffer(Bitmap bitmap, int x, int y, int width, int height, float[] normMeanRGB, float[] normStdRGB, FloatBuffer outBuffer, int outBufferOffset)
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
   Writes tensor content from specified \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std to specified \ :java:ref:`java.nio.FloatBuffer`\  with specified offset.
 | 
			
		||||
 | 
			
		||||
   :param bitmap: \ :java:ref:`android.graphics.Bitmap`\  as a source for Tensor data
 | 
			
		||||
   :param x: - x coordinate of top left corner of bitmap's area
 | 
			
		||||
   :param y: - y coordinate of top left corner of bitmap's area
 | 
			
		||||
   :param width: - width of bitmap's area
 | 
			
		||||
   :param height: - height of bitmap's area
 | 
			
		||||
   :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
   :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
 | 
			
		||||
imageYUV420CenterCropToFloat32Tensor
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static Tensor imageYUV420CenterCropToFloat32Tensor(Image image, int rotateCWDegrees, int tensorWidth, int tensorHeight, float[] normMeanRGB, float[] normStdRGB)
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
   Creates new \ :java:ref:`org.pytorch.Tensor`\  from specified area of \ :java:ref:`android.media.Image`\ , doing optional rotation, scaling (nearest) and center cropping.
 | 
			
		||||
 | 
			
		||||
   :param image: \ :java:ref:`android.media.Image`\  as a source for Tensor data
 | 
			
		||||
   :param rotateCWDegrees: Clockwise angle through which the input image needs to be rotated to be upright. Range of valid values: 0, 90, 180, 270
 | 
			
		||||
   :param tensorWidth: return tensor width, must be positive
 | 
			
		||||
   :param tensorHeight: return tensor height, must be positive
 | 
			
		||||
   :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
   :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
 | 
			
		||||
imageYUV420CenterCropToFloatBuffer
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. java:method:: public static void imageYUV420CenterCropToFloatBuffer(Image image, int rotateCWDegrees, int tensorWidth, int tensorHeight, float[] normMeanRGB, float[] normStdRGB, FloatBuffer outBuffer, int outBufferOffset)
 | 
			
		||||
   :outertype: TensorImageUtils
 | 
			
		||||
 | 
			
		||||
   Writes tensor content from specified \ :java:ref:`android.media.Image`\ , doing optional rotation, scaling (nearest) and center cropping to specified \ :java:ref:`java.nio.FloatBuffer`\  with specified offset.
 | 
			
		||||
 | 
			
		||||
   :param image: \ :java:ref:`android.media.Image`\  as a source for Tensor data
 | 
			
		||||
   :param rotateCWDegrees: Clockwise angle through which the input image needs to be rotated to be upright. Range of valid values: 0, 90, 180, 270
 | 
			
		||||
   :param tensorWidth: return tensor width, must be positive
 | 
			
		||||
   :param tensorHeight: return tensor height, must be positive
 | 
			
		||||
   :param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
   :param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
 | 
			
		||||
   :param outBuffer: Output buffer, where tensor content will be written
 | 
			
		||||
   :param outBufferOffset: Output buffer offset with which tensor content will be written
 | 
			
		||||
@ -1,18 +0,0 @@
 | 
			
		||||
org.pytorch
 | 
			
		||||
===========
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch
 | 
			
		||||
 | 
			
		||||
.. toctree::
 | 
			
		||||
   :maxdepth: 1
 | 
			
		||||
 | 
			
		||||
   DType
 | 
			
		||||
   IValue
 | 
			
		||||
   Module
 | 
			
		||||
   Tensor
 | 
			
		||||
   Tensor-Tensor_float32
 | 
			
		||||
   Tensor-Tensor_float64
 | 
			
		||||
   Tensor-Tensor_int32
 | 
			
		||||
   Tensor-Tensor_int64
 | 
			
		||||
   Tensor-Tensor_int8
 | 
			
		||||
   Tensor-Tensor_uint8
 | 
			
		||||
@ -1,9 +0,0 @@
 | 
			
		||||
rg.pytorch.torchvision
 | 
			
		||||
=======================
 | 
			
		||||
 | 
			
		||||
.. java:package:: org.pytorch.torchvision
 | 
			
		||||
 | 
			
		||||
.. toctree::
 | 
			
		||||
   :maxdepth: 1
 | 
			
		||||
 | 
			
		||||
   TensorImageUtils
 | 
			
		||||
@ -1,7 +0,0 @@
 | 
			
		||||
Javadoc
 | 
			
		||||
=======
 | 
			
		||||
 | 
			
		||||
.. toctree::
 | 
			
		||||
   :maxdepth: 2
 | 
			
		||||
 | 
			
		||||
   org/pytorch/package-index
 | 
			
		||||
@ -42,6 +42,27 @@ The corresponding implementation is chosen automatically based on the PyTorch bu
 | 
			
		||||
 | 
			
		||||
  Quantization-aware training (through :class:`~torch.quantization.FakeQuantize`) supports both CPU and CUDA.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. note::
 | 
			
		||||
 | 
			
		||||
   When preparing a quantized model, it is necessary to ensure that qconfig and the engine used for quantized computations match 
 | 
			
		||||
   the backend on which the model will be executed. Quantization currently supports two backends: fbgemm (for use on x86, 
 | 
			
		||||
   `<https://github.com/pytorch/FBGEMM>`_) and qnnpack (for use on the ARM QNNPACK library `<https://github.com/pytorch/QNNPACK>`_). 
 | 
			
		||||
   For example, if you are interested in quantizing a model to run on ARM, it is recommended to set the qconfig by calling:
 | 
			
		||||
 | 
			
		||||
   ``qconfig = torch.quantization.get_default_qconfig('qnnpack')``
 | 
			
		||||
 | 
			
		||||
   for post training quantization and
 | 
			
		||||
 | 
			
		||||
   ``qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')``
 | 
			
		||||
 | 
			
		||||
   for quantization aware training.
 | 
			
		||||
 | 
			
		||||
   In addition, the torch.backends.quantized.engine parameter should be set to match the backend. For using qnnpack for inference, the 
 | 
			
		||||
   backend is set to qnnpack as follows
 | 
			
		||||
 | 
			
		||||
   ``torch.backends.quantized.engine = 'qnnpack'``
 | 
			
		||||
 | 
			
		||||
Quantized Tensors
 | 
			
		||||
---------------------------------------
 | 
			
		||||
 | 
			
		||||
@ -111,7 +132,7 @@ Operations that are available from the ``torch`` namespace or as methods on Tens
 | 
			
		||||
 | 
			
		||||
* :func:`~torch.quantize_per_tensor` - Convert float tensor to quantized tensor with per-tensor scale and zero point
 | 
			
		||||
* :func:`~torch.quantize_per_channel` - Convert float tensor to quantized tensor with per-channel scale and zero point
 | 
			
		||||
* View-based operations like :meth:`~torch.Tensor.view`, :meth:`~torch.Tensor.as_strided`, :meth:`~torch.Tensor.expand`, :meth:`~torch.Tensor.flatten`, :meth:`~torch.Tensor.slice`, python-style indexing, etc - work as on regular tensor (if quantization is not per-channel)
 | 
			
		||||
* View-based operations like :meth:`~torch.Tensor.view`, :meth:`~torch.Tensor.as_strided`, :meth:`~torch.Tensor.expand`, :meth:`~torch.Tensor.flatten`, :meth:`~torch.Tensor.select`, python-style indexing, etc - work as on regular tensor (if quantization is not per-channel)
 | 
			
		||||
* Comparators
 | 
			
		||||
    * :meth:`~torch.Tensor.ne` — Not equal
 | 
			
		||||
    * :meth:`~torch.Tensor.eq` — Equal
 | 
			
		||||
@ -132,12 +153,24 @@ Operations that are available from the ``torch`` namespace or as methods on Tens
 | 
			
		||||
* :meth:`~torch.Tensor.q_per_channel_scales` — Returns the scales of the per-channel quantized tensor
 | 
			
		||||
* :meth:`~torch.Tensor.q_per_channel_zero_points` — Returns the zero points of the per-channel quantized tensor
 | 
			
		||||
* :meth:`~torch.Tensor.q_per_channel_axis` — Returns the channel axis of the per-channel quantized tensor
 | 
			
		||||
* :meth:`~torch.Tensor.relu` — Rectified linear unit (copy)
 | 
			
		||||
* :meth:`~torch.Tensor.relu_` — Rectified linear unit (inplace)
 | 
			
		||||
* :meth:`~torch.Tensor.resize_` — In-place resize
 | 
			
		||||
* :meth:`~torch.Tensor.sort` — Sorts the tensor
 | 
			
		||||
* :meth:`~torch.Tensor.topk` — Returns k largest values of a tensor
 | 
			
		||||
 | 
			
		||||
``torch.nn.functional``
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
Basic activations are supported.
 | 
			
		||||
 | 
			
		||||
* :meth:`~torch.nn.functional.relu` — Rectified linear unit (copy)
 | 
			
		||||
* :meth:`~torch.nn.functional.relu_` — Rectified linear unit (inplace)
 | 
			
		||||
* :meth:`~torch.nn.functional.max_pool2d` - Maximum pooling 
 | 
			
		||||
* :meth:`~torch.nn.functional.adaptive_avg_pool2d` - Adaptive average pooling
 | 
			
		||||
* :meth:`~torch.nn.functional.avg_pool2d` - Average pooling
 | 
			
		||||
* :meth:`~torch.nn.functional.interpolate` - Interpolation
 | 
			
		||||
* :meth:`~torch.nn.functional.upsample` - Upsampling
 | 
			
		||||
* :meth:`~torch.nn.functional.upsample_bilinear` - Bilinear Upsampling 
 | 
			
		||||
* :meth:`~torch.nn.functional.upsample_nearest` - Upsampling Nearest
 | 
			
		||||
 | 
			
		||||
``torch.nn.intrinsic``
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
@ -432,7 +465,7 @@ Debugging utilities
 | 
			
		||||
.. autofunction:: get_observer_dict
 | 
			
		||||
.. autoclass:: RecordingObserver
 | 
			
		||||
 | 
			
		||||
torch.nn.instrinsic
 | 
			
		||||
torch.nn.intrinsic
 | 
			
		||||
--------------------------------
 | 
			
		||||
 | 
			
		||||
This module implements the combined (fused) modules conv + relu which can be then quantized.
 | 
			
		||||
@ -546,6 +579,13 @@ Functional interface
 | 
			
		||||
.. autofunction:: conv2d
 | 
			
		||||
.. autofunction:: conv3d
 | 
			
		||||
.. autofunction:: max_pool2d
 | 
			
		||||
.. autofunction:: adaptive_avg_pool2d
 | 
			
		||||
.. autofunction:: avg_pool2d
 | 
			
		||||
.. autofunction:: interpolate
 | 
			
		||||
.. autofunction:: upsample
 | 
			
		||||
.. autofunction:: upsample_bilinear
 | 
			
		||||
.. autofunction:: upsample_nearest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. automodule:: torch.nn.quantized
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -55,7 +55,7 @@ This library provides primitives allowing users to create and modify references
 | 
			
		||||
.. autofunction:: rpc_async
 | 
			
		||||
.. autofunction:: remote
 | 
			
		||||
.. autofunction:: get_worker_info
 | 
			
		||||
.. autofunction:: wait_all_workers
 | 
			
		||||
.. autofunction:: shutdown
 | 
			
		||||
 | 
			
		||||
Distributed Autograd Framework
 | 
			
		||||
------------------------------
 | 
			
		||||
 | 
			
		||||
@ -1,131 +0,0 @@
 | 
			
		||||
/**
 | 
			
		||||
 * Copyright (c) 2016-present, Facebook, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
 * you may not use this file except in compliance with the License.
 | 
			
		||||
 * You may obtain a copy of the License at
 | 
			
		||||
 *
 | 
			
		||||
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 *
 | 
			
		||||
 * Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
 * See the License for the specific language governing permissions and
 | 
			
		||||
 * limitations under the License.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "batch_permutation_op.h"
 | 
			
		||||
#ifdef CAFFE2_USE_MKLDNN
 | 
			
		||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
 | 
			
		||||
#include <caffe2/ideep/utils/ideep_operator.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
#ifdef CAFFE2_USE_MKLDNN
 | 
			
		||||
REGISTER_IDEEP_OPERATOR(
 | 
			
		||||
    BatchPermutation,
 | 
			
		||||
    IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
 | 
			
		||||
REGISTER_CPU_OPERATOR(
 | 
			
		||||
    BatchPermutationGradient,
 | 
			
		||||
    BatchPermutationGradientOp<float, CPUContext>);
 | 
			
		||||
 | 
			
		||||
OPERATOR_SCHEMA(BatchPermutation)
 | 
			
		||||
    .NumInputs(2)
 | 
			
		||||
    .NumOutputs(1)
 | 
			
		||||
    .SetDoc(R"DOC(
 | 
			
		||||
Permute the batch elements of the input tensor X according to the permutation
 | 
			
		||||
specified in the input indices.
 | 
			
		||||
 | 
			
		||||
Warning: this op does not verify that indices is a valid permutation; gradient
 | 
			
		||||
comptuation is only correct if indices is a permutation.
 | 
			
		||||
)DOC")
 | 
			
		||||
    .Input(
 | 
			
		||||
        0,
 | 
			
		||||
        "X",
 | 
			
		||||
        "Tensor of at least 1D shape (N, D0, D1, ...).")
 | 
			
		||||
    .Input(
 | 
			
		||||
        1,
 | 
			
		||||
        "indices",
 | 
			
		||||
        "1D tensor of type int with shape (N, ) specifying a valid permutation "
 | 
			
		||||
        "of the indices in [0, N - 1] (inclusive).")
 | 
			
		||||
    .Output(
 | 
			
		||||
        0,
 | 
			
		||||
        "Y",
 | 
			
		||||
        "Tensor with the same shape as X where the (D0, D1, ...) dimensional "
 | 
			
		||||
        "batch elements of X are permuted according to the input indices.");
 | 
			
		||||
 | 
			
		||||
OPERATOR_SCHEMA(BatchPermutationGradient)
 | 
			
		||||
    .NumInputs(2)
 | 
			
		||||
    .NumOutputs(1)
 | 
			
		||||
    .Input(
 | 
			
		||||
        0,
 | 
			
		||||
        "indices",
 | 
			
		||||
        "See BatchPermutation.")
 | 
			
		||||
    .Input(
 | 
			
		||||
        1,
 | 
			
		||||
        "dY",
 | 
			
		||||
        "Gradient of forward output 0 (Y).")
 | 
			
		||||
    .Output(
 | 
			
		||||
        0,
 | 
			
		||||
        "dX",
 | 
			
		||||
        "Gradient of forward input 0 (X).");
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
 | 
			
		||||
  const auto& X = Input(0);
 | 
			
		||||
  const auto& indices = Input(1);
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE_EQ(indices.dim(), 1, "indices must be 1-d");
 | 
			
		||||
  CAFFE_ENFORCE_EQ(
 | 
			
		||||
    X.dim32(0), indices.dim32(0),
 | 
			
		||||
    "X.dim32(0) must be equal to indices.dim32(0)",
 | 
			
		||||
    "(",
 | 
			
		||||
    X.dim32(0),
 | 
			
		||||
    " vs. ",
 | 
			
		||||
    indices.dim32(0),
 | 
			
		||||
    ")");
 | 
			
		||||
 | 
			
		||||
  auto* Y = Output(0, X.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  const int N = X.dim32(0);
 | 
			
		||||
  const int C = X.dim32(1);
 | 
			
		||||
  const int H = X.dim32(2);
 | 
			
		||||
  const int W = X.dim32(3);
 | 
			
		||||
 | 
			
		||||
  const float *src = X.template data<float>();
 | 
			
		||||
  float *dst = Y->template mutable_data<float>();
 | 
			
		||||
 | 
			
		||||
#ifdef _OPENMP
 | 
			
		||||
#if (_OPENMP >= 201307)
 | 
			
		||||
#pragma omp parallel for simd
 | 
			
		||||
#else
 | 
			
		||||
#pragma omp parallel for
 | 
			
		||||
#endif 
 | 
			
		||||
#endif  
 | 
			
		||||
  for (int i = 0; i < N; i++) {
 | 
			
		||||
    int idx = indices.template data<int>()[i];
 | 
			
		||||
 | 
			
		||||
    std::memcpy(dst + i * C * H * W, src + idx * C * H * W, sizeof(float) * C * H * W);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class GetBatchPermutationGradient : public GradientMakerBase {
 | 
			
		||||
  using GradientMakerBase::GradientMakerBase;
 | 
			
		||||
  vector<OperatorDef> GetGradientDefs() override {
 | 
			
		||||
    return SingleGradientDef(
 | 
			
		||||
        "BatchPermutationGradient",
 | 
			
		||||
        "",
 | 
			
		||||
        vector<string>{I(1), GO(0)},
 | 
			
		||||
        vector<string>{GI(0)});
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient);
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
@ -1,112 +0,0 @@
 | 
			
		||||
/**
 | 
			
		||||
 * Copyright (c) 2016-present, Facebook, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
 * you may not use this file except in compliance with the License.
 | 
			
		||||
 * You may obtain a copy of the License at
 | 
			
		||||
 *
 | 
			
		||||
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 *
 | 
			
		||||
 * Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
 * See the License for the specific language governing permissions and
 | 
			
		||||
 * limitations under the License.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "modules/detectron/batch_permutation_op.h"
 | 
			
		||||
#include "caffe2/core/context_gpu.h"
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
template <bool forward>
 | 
			
		||||
__global__ void BatchPermutationKernel(
 | 
			
		||||
    int N,
 | 
			
		||||
    int C,
 | 
			
		||||
    int H,
 | 
			
		||||
    int W,
 | 
			
		||||
    const float* src,
 | 
			
		||||
    const int* indices,
 | 
			
		||||
    float* dst) {
 | 
			
		||||
  CUDA_1D_KERNEL_LOOP(index, N * C * H * W) {
 | 
			
		||||
    int w = index % W;
 | 
			
		||||
    int h = (index / W) % H;
 | 
			
		||||
    int c = (index / W / H) % C;
 | 
			
		||||
    int n = (index / W / H / C);
 | 
			
		||||
    int idx = indices[n];
 | 
			
		||||
    if (forward) {
 | 
			
		||||
      dst[n * C * H * W + c * H * W + h * W + w] =
 | 
			
		||||
          src[idx * C * H * W + c * H * W + h * W + w];
 | 
			
		||||
    } else {
 | 
			
		||||
      dst[idx * C * H * W + c * H * W + h * W + w] =
 | 
			
		||||
          src[n * C * H * W + c * H * W + h * W + w];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationOp<float, CUDAContext>::RunOnDevice() {
 | 
			
		||||
  auto& X = Input(0);
 | 
			
		||||
  auto& indices = Input(1);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  CAFFE_ENFORCE(indices.ndim() == 1, "indices must be 1-d");
 | 
			
		||||
  CAFFE_ENFORCE(
 | 
			
		||||
      X.dim32(0) == indices.dim32(0),
 | 
			
		||||
      "X.dim32(0) must be equal to indices.dim32(0)",
 | 
			
		||||
      "(",
 | 
			
		||||
      X.dim32(0),
 | 
			
		||||
      " vs. ",
 | 
			
		||||
      indices.dim32(0),
 | 
			
		||||
      ")");
 | 
			
		||||
 | 
			
		||||
  auto* Y = Output(0, X.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  BatchPermutationKernel<true><<<
 | 
			
		||||
      CAFFE_GET_BLOCKS(X.size()),
 | 
			
		||||
      CAFFE_CUDA_NUM_THREADS,
 | 
			
		||||
      0,
 | 
			
		||||
      context_.cuda_stream()>>>(
 | 
			
		||||
      X.dim32(0),
 | 
			
		||||
      X.dim32(1),
 | 
			
		||||
      X.dim32(2),
 | 
			
		||||
      X.dim32(3),
 | 
			
		||||
      X.data<float>(),
 | 
			
		||||
      indices.data<int>(),
 | 
			
		||||
      Y->mutable_data<float>());
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <>
 | 
			
		||||
bool BatchPermutationGradientOp<float, CUDAContext>::RunOnDevice() {
 | 
			
		||||
  auto& indices = Input(0);
 | 
			
		||||
  auto& dY = Input(1);
 | 
			
		||||
 | 
			
		||||
  auto* dX = Output(0, dY.sizes(), at::dtype<float>());
 | 
			
		||||
 | 
			
		||||
  BatchPermutationKernel<false><<<
 | 
			
		||||
      CAFFE_GET_BLOCKS(dY.size()),
 | 
			
		||||
      CAFFE_CUDA_NUM_THREADS,
 | 
			
		||||
      0,
 | 
			
		||||
      context_.cuda_stream()>>>(
 | 
			
		||||
      dY.dim32(0),
 | 
			
		||||
      dY.dim32(1),
 | 
			
		||||
      dY.dim32(2),
 | 
			
		||||
      dY.dim32(3),
 | 
			
		||||
      dY.data<float>(),
 | 
			
		||||
      indices.data<int>(),
 | 
			
		||||
      dX->mutable_data<float>());
 | 
			
		||||
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
REGISTER_CUDA_OPERATOR(
 | 
			
		||||
    BatchPermutation,
 | 
			
		||||
    BatchPermutationOp<float, CUDAContext>);
 | 
			
		||||
REGISTER_CUDA_OPERATOR(
 | 
			
		||||
    BatchPermutationGradient,
 | 
			
		||||
    BatchPermutationGradientOp<float, CUDAContext>);
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
@ -1,53 +0,0 @@
 | 
			
		||||
/**
 | 
			
		||||
 * Copyright (c) 2016-present, Facebook, Inc.
 | 
			
		||||
 *
 | 
			
		||||
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
 * you may not use this file except in compliance with the License.
 | 
			
		||||
 * You may obtain a copy of the License at
 | 
			
		||||
 *
 | 
			
		||||
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 *
 | 
			
		||||
 * Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
 * See the License for the specific language governing permissions and
 | 
			
		||||
 * limitations under the License.
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef BATCHPERMUTATION_OP_H_
 | 
			
		||||
#define BATCHPERMUTATION_OP_H_
 | 
			
		||||
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include "caffe2/core/context.h"
 | 
			
		||||
#include "caffe2/core/logging.h"
 | 
			
		||||
#include "caffe2/core/operator.h"
 | 
			
		||||
#include "caffe2/utils/math.h"
 | 
			
		||||
 | 
			
		||||
namespace caffe2 {
 | 
			
		||||
 | 
			
		||||
template <typename T, class Context>
 | 
			
		||||
class BatchPermutationOp final : public Operator<Context> {
 | 
			
		||||
 public:
 | 
			
		||||
  BatchPermutationOp(const OperatorDef& operator_def, Workspace* ws)
 | 
			
		||||
      : Operator<Context>(operator_def, ws) {}
 | 
			
		||||
  USE_OPERATOR_CONTEXT_FUNCTIONS;
 | 
			
		||||
 | 
			
		||||
  bool RunOnDevice() override;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <typename T, class Context>
 | 
			
		||||
class BatchPermutationGradientOp final : public Operator<Context> {
 | 
			
		||||
 public:
 | 
			
		||||
  BatchPermutationGradientOp(const OperatorDef& def, Workspace* ws)
 | 
			
		||||
      : Operator<Context>(def, ws) {}
 | 
			
		||||
  USE_OPERATOR_CONTEXT_FUNCTIONS;
 | 
			
		||||
 | 
			
		||||
  bool RunOnDevice() override {
 | 
			
		||||
    // No CPU implementation for now
 | 
			
		||||
    CAFFE_NOT_IMPLEMENTED;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace caffe2
 | 
			
		||||
 | 
			
		||||
#endif // BATCHPERMUTATION_OP_H_
 | 
			
		||||
@ -17,17 +17,23 @@ from torch._C import parse_schema
 | 
			
		||||
#
 | 
			
		||||
# Whitelist entries can be removed after the date listed on them passes.
 | 
			
		||||
white_list = [
 | 
			
		||||
    ('c10_experimental', datetime.date(2020, 1, 1)),
 | 
			
		||||
    ('_batch_norm_impl_index', datetime.date(2019, 11, 15)),
 | 
			
		||||
    ('_batch_norm_impl_index_backward', datetime.date(2019, 11, 15)),
 | 
			
		||||
    ('cudnn_batch_norm', datetime.date(2019, 11, 15)),
 | 
			
		||||
    ('cudnn_batch_norm_backward', datetime.date(2019, 11, 15)),
 | 
			
		||||
    ('_nnpack_spatial_convolution', datetime.date(2019, 11, 12)),
 | 
			
		||||
    ('_aten', datetime.date(2019, 12, 22)),
 | 
			
		||||
    ('_prim::ListConstruct', datetime.date(2019, 11, 22)),
 | 
			
		||||
    ('thnn_conv3d', datetime.date(9999, 1, 1)),
 | 
			
		||||
    ('thnn_conv3d.out', datetime.date(9999, 1, 1)),
 | 
			
		||||
    ('grad', datetime.date(2020, 1, 1)),
 | 
			
		||||
    ("aten::append", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("prim::AutogradAnyNonZero", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::grad", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("_c10_experimental", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::thnn_conv3d", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::native_layer_norm_double_backward", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::cudnn_batch_norm", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::cudnn_batch_norm_backward", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::_batch_norm_impl_index_backward", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::empty_like", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::_batch_norm_impl_index", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::index_fill_", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::index_fill", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::log_softmax", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::softmax", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::thnn_conv3d_forward", datetime.date(9999, 1, 1)),
 | 
			
		||||
    ("aten::thnn_conv3d_backward.output_mask", datetime.date(9999, 1, 1)),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -43,6 +49,8 @@ def white_listed(schema, white_list):
 | 
			
		||||
 | 
			
		||||
def check_bc(new_schema_dict):
 | 
			
		||||
    existing_schemas = torch._C._jit_get_all_schemas()
 | 
			
		||||
    is_bc = True
 | 
			
		||||
    broken_ops = []
 | 
			
		||||
    for existing_schema in existing_schemas:
 | 
			
		||||
        if white_listed(existing_schema, white_list):
 | 
			
		||||
            print("skipping schema: ", str(existing_schema))
 | 
			
		||||
@ -60,13 +68,17 @@ def check_bc(new_schema_dict):
 | 
			
		||||
                  .format(
 | 
			
		||||
                      str(existing_schema),
 | 
			
		||||
                      "\n\t".join(str(s) for s in new_schemas)))
 | 
			
		||||
            print('The PR is introducing backward incompatible changes to the '
 | 
			
		||||
                  'operator library. Please contact PyTorch team to confirm '
 | 
			
		||||
                  'whether this change is wanted or not.')
 | 
			
		||||
            # TODO Print out more details about why candidates don't match.
 | 
			
		||||
            return False
 | 
			
		||||
    print('Found backward compatible schemas for all existing schemas')
 | 
			
		||||
    return True
 | 
			
		||||
            broken_ops.append(str(existing_schema))
 | 
			
		||||
            is_bc = False
 | 
			
		||||
    if is_bc:
 | 
			
		||||
        print('Found backward compatible schemas for all existing schemas')
 | 
			
		||||
    else:
 | 
			
		||||
        print('The PR is introducing backward incompatible changes to the '
 | 
			
		||||
              'operator library. Please contact PyTorch team to confirm '
 | 
			
		||||
              'whether this change is wanted or not. \n Broken ops: [\n{}]'
 | 
			
		||||
              .format("\n".join(broken_ops)))
 | 
			
		||||
    return is_bc
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 | 
			
		||||
@ -903,6 +903,15 @@ class TestCase(expecttest.TestCase):
 | 
			
		||||
        # Don't put this in the try block; the AssertionError will catch it
 | 
			
		||||
        self.fail(msg="Did not raise when expected to")
 | 
			
		||||
 | 
			
		||||
    def assertNotWarn(self, callable, msg=''):
 | 
			
		||||
        r"""
 | 
			
		||||
        Test if :attr:`callable` does not raise a warning.
 | 
			
		||||
        """
 | 
			
		||||
        with self._reset_warning_registry(), warnings.catch_warnings(record=True) as ws:
 | 
			
		||||
            warnings.simplefilter("always")  # allow any warning to be raised
 | 
			
		||||
            callable()
 | 
			
		||||
            self.assertTrue(len(ws) == 0, msg)
 | 
			
		||||
 | 
			
		||||
    def assertWarns(self, callable, msg=''):
 | 
			
		||||
        r"""
 | 
			
		||||
        Test if :attr:`callable` raises a warning.
 | 
			
		||||
 | 
			
		||||
@ -145,7 +145,7 @@ TEST_F(ModuleTest, RegisterParameterUndefinedTensor) {
 | 
			
		||||
  {
 | 
			
		||||
    TestModel model;
 | 
			
		||||
    model.register_parameter("undefined_tensor", torch::Tensor(), /*requires_grad=*/false);
 | 
			
		||||
    ASSERT_FALSE(model.named_parameters()["undefined_tensor"].defined());
 | 
			
		||||
    ASSERT_EQ(model.parameters().size(), 0);
 | 
			
		||||
  }
 | 
			
		||||
  {
 | 
			
		||||
    std::stringstream buffer;
 | 
			
		||||
@ -153,7 +153,7 @@ TEST_F(ModuleTest, RegisterParameterUndefinedTensor) {
 | 
			
		||||
 | 
			
		||||
    TestModel model;
 | 
			
		||||
    model.register_parameter("undefined_tensor", torch::Tensor());
 | 
			
		||||
    ASSERT_FALSE(model.named_parameters()["undefined_tensor"].defined());
 | 
			
		||||
    ASSERT_EQ(model.parameters().size(), 0);
 | 
			
		||||
 | 
			
		||||
    ASSERT_EQ(
 | 
			
		||||
      count_substr_occurrences(
 | 
			
		||||
@ -221,6 +221,87 @@ TEST_F(ModuleTest, AsCastsModulesCorrectly) {
 | 
			
		||||
  ASSERT_EQ(unit.as<AGIUnit>(), &unit);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void test_DeviceOrDtypeConversionSkipsUndefinedTensor(
 | 
			
		||||
  torch::Device to_device, torch::Dtype to_dtype) {
 | 
			
		||||
  {
 | 
			
		||||
    // Case 1: Undefined tensors as parameters
 | 
			
		||||
    Linear module(LinearOptions(10, 20).bias(false));
 | 
			
		||||
    ASSERT_TRUE(module->weight.defined());
 | 
			
		||||
    ASSERT_FALSE(module->bias.defined());
 | 
			
		||||
 | 
			
		||||
    module->to(to_device);
 | 
			
		||||
    ASSERT_TRUE(module->weight.defined());
 | 
			
		||||
    ASSERT_EQ(module->weight.device().type(), to_device.type());
 | 
			
		||||
    ASSERT_FALSE(module->bias.defined());
 | 
			
		||||
 | 
			
		||||
    module->to(to_dtype);
 | 
			
		||||
    ASSERT_TRUE(module->weight.defined());
 | 
			
		||||
    ASSERT_EQ(module->weight.dtype(), to_dtype);
 | 
			
		||||
    ASSERT_FALSE(module->bias.defined());
 | 
			
		||||
  }
 | 
			
		||||
  {
 | 
			
		||||
    // Case 2: Undefined tensors as buffers
 | 
			
		||||
    BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(false).affine(true));
 | 
			
		||||
    ASSERT_TRUE(module->weight.defined());
 | 
			
		||||
    ASSERT_FALSE(module->running_mean.defined());
 | 
			
		||||
 | 
			
		||||
    module->to(to_device);
 | 
			
		||||
    ASSERT_TRUE(module->weight.defined());
 | 
			
		||||
    ASSERT_EQ(module->weight.device().type(), to_device.type());
 | 
			
		||||
    ASSERT_FALSE(module->running_mean.defined());
 | 
			
		||||
 | 
			
		||||
    module->to(to_dtype);
 | 
			
		||||
    ASSERT_TRUE(module->weight.defined());
 | 
			
		||||
    ASSERT_EQ(module->weight.dtype(), to_dtype);
 | 
			
		||||
    ASSERT_FALSE(module->running_mean.defined());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST_F(ModuleTest, DeviceOrDtypeConversionSkipsUndefinedTensor) {
 | 
			
		||||
  test_DeviceOrDtypeConversionSkipsUndefinedTensor(torch::kCPU, torch::kDouble);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST_F(ModuleTest, DeviceOrDtypeConversionSkipsUndefinedTensor_CUDA) {
 | 
			
		||||
  test_DeviceOrDtypeConversionSkipsUndefinedTensor(torch::kCUDA, torch::kDouble);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST_F(ModuleTest, ParametersAndBuffersAccessorSkipsUndefinedTensor) {
 | 
			
		||||
  {
 | 
			
		||||
    Linear module(LinearOptions(10, 20).bias(false));
 | 
			
		||||
 | 
			
		||||
    auto params = module->parameters();
 | 
			
		||||
    ASSERT_EQ(params.size(), 1);
 | 
			
		||||
    auto named_params = module->named_parameters();
 | 
			
		||||
    ASSERT_EQ(named_params.size(), 1);
 | 
			
		||||
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(params[0], named_params["weight"]));
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(named_params["weight"], module->weight));
 | 
			
		||||
  }
 | 
			
		||||
  {
 | 
			
		||||
    BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(false).affine(false));
 | 
			
		||||
 | 
			
		||||
    auto buffers = module->buffers();
 | 
			
		||||
    ASSERT_EQ(buffers.size(), 0);
 | 
			
		||||
    auto named_buffers = module->named_buffers();
 | 
			
		||||
    ASSERT_EQ(named_buffers.size(), 0);
 | 
			
		||||
  }
 | 
			
		||||
  {
 | 
			
		||||
    BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(true).affine(false));
 | 
			
		||||
 | 
			
		||||
    auto buffers = module->buffers();
 | 
			
		||||
    ASSERT_EQ(buffers.size(), 3);
 | 
			
		||||
    auto named_buffers = module->named_buffers();
 | 
			
		||||
    ASSERT_EQ(named_buffers.size(), 3);
 | 
			
		||||
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(buffers[0], named_buffers["running_mean"]));
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(named_buffers["running_mean"], module->running_mean));
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(buffers[1], named_buffers["running_var"]));
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(named_buffers["running_var"], module->running_var));
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(buffers[2], named_buffers["num_batches_tracked"]));
 | 
			
		||||
    ASSERT_TRUE(pointer_equal(named_buffers["num_batches_tracked"], module->num_batches_tracked));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
TEST_F(ModuleTest, Conversion_MultiCUDA) {
 | 
			
		||||
  Linear module(128, 64);
 | 
			
		||||
  for (auto& parameter : module->parameters()) {
 | 
			
		||||
 | 
			
		||||
@ -46,7 +46,7 @@ private:
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
inline bool pointer_equal(at::Tensor first, at::Tensor second) {
 | 
			
		||||
  return first.data_ptr<float>() == second.data_ptr<float>();
 | 
			
		||||
  return first.data_ptr() == second.data_ptr();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
inline int count_substr_occurrences(const std::string& str, const std::string& substr) {
 | 
			
		||||
 | 
			
		||||
@ -1360,7 +1360,7 @@ class DistAutogradTest(RpcAgentTestFixture):
 | 
			
		||||
        # receive gradients from the node that received an error (and as a
 | 
			
		||||
        # result it didn't execute the rest of the graph).
 | 
			
		||||
        dist.barrier()
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
        sys.exit(0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -72,6 +72,11 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
 | 
			
		||||
 | 
			
		||||
    @wraps(old_test_method)
 | 
			
		||||
    def new_test_method(self, *arg, **kwargs):
 | 
			
		||||
        # Setting _ignore_rref_leak to make sure OwnerRRefs are properly deleted
 | 
			
		||||
        # in tests.
 | 
			
		||||
        import torch.distributed.rpc.api as api
 | 
			
		||||
        api._ignore_rref_leak = False
 | 
			
		||||
 | 
			
		||||
        self.worker_id = self.rank
 | 
			
		||||
 | 
			
		||||
        if setup_rpc:
 | 
			
		||||
@ -83,7 +88,6 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
 | 
			
		||||
            rpc.init_rpc(
 | 
			
		||||
                name="worker%d" % self.rank,
 | 
			
		||||
                backend=self.rpc_backend,
 | 
			
		||||
                init_method=self.init_method,
 | 
			
		||||
                rank=self.rank,
 | 
			
		||||
                world_size=self.world_size,
 | 
			
		||||
                rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
@ -123,7 +127,7 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
 | 
			
		||||
            # since we need to shutdown the RPC agent. If we don't shutdown the
 | 
			
		||||
            # RPC agent, tests would fail since RPC agent threads, locks and
 | 
			
		||||
            # condition variables are not properly terminated.
 | 
			
		||||
            rpc.wait_all_workers()
 | 
			
		||||
            rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
        return return_value
 | 
			
		||||
 | 
			
		||||
@ -134,6 +138,7 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
 | 
			
		||||
TEST_CONFIG.rpc_backend_name = "PROCESS_GROUP"
 | 
			
		||||
TEST_CONFIG.build_rpc_backend_options = lambda test_object: rpc.backend_registry.construct_rpc_backend_options(
 | 
			
		||||
    test_object.rpc_backend,
 | 
			
		||||
    init_method=test_object.init_method,
 | 
			
		||||
    # Use enough 'num_send_recv_threads' until we fix https://github.com/pytorch/pytorch/issues/26359
 | 
			
		||||
    num_send_recv_threads=16,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@ -6,7 +6,7 @@ import hypothesis
 | 
			
		||||
from hypothesis import assume
 | 
			
		||||
from hypothesis import strategies as st
 | 
			
		||||
from hypothesis.extra import numpy as stnp
 | 
			
		||||
from hypothesis.searchstrategy import SearchStrategy
 | 
			
		||||
from hypothesis.strategies import SearchStrategy
 | 
			
		||||
 | 
			
		||||
from common_quantized import _calculate_dynamic_qparams, _calculate_dynamic_per_channel_qparams
 | 
			
		||||
 | 
			
		||||
@ -304,10 +304,11 @@ def tensor_conv(
 | 
			
		||||
 | 
			
		||||
    return X, W, b, groups
 | 
			
		||||
 | 
			
		||||
# Disable deadline testing if this version of hypthesis supports it, otherwise
 | 
			
		||||
# just return the original function
 | 
			
		||||
def no_deadline(fn):
 | 
			
		||||
    try:
 | 
			
		||||
        return hypothesis.settings(deadline=None)(fn)
 | 
			
		||||
    except hypothesis.errors.InvalidArgument:
 | 
			
		||||
        return fn
 | 
			
		||||
from hypothesis import settings
 | 
			
		||||
settings.register_profile("no_deadline", deadline=None)
 | 
			
		||||
settings.load_profile("no_deadline")
 | 
			
		||||
 | 
			
		||||
# This is really just to get flake8 to not complain when this file
 | 
			
		||||
# is imported purely for the side-effectful stuff above
 | 
			
		||||
def assert_deadline_disabled():
 | 
			
		||||
    assert settings().deadline is None
 | 
			
		||||
 | 
			
		||||
@ -183,31 +183,6 @@ class TestONNXRuntime(unittest.TestCase):
 | 
			
		||||
        x = torch.randn(2, 3, 224, 224, requires_grad=True)
 | 
			
		||||
        self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
 | 
			
		||||
 | 
			
		||||
    def test_googlenet_quantization(self):
 | 
			
		||||
        model = torchvision.models.quantization.googlenet(pretrained=True)
 | 
			
		||||
        x = torch.randn(2, 3, 224, 224, requires_grad=True)
 | 
			
		||||
        self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
 | 
			
		||||
 | 
			
		||||
    def test_inception_quantization(self):
 | 
			
		||||
        model = torchvision.models.quantization.inception_v3(pretrained=True)
 | 
			
		||||
        x = torch.randn(2, 3, 224, 224, requires_grad=True)
 | 
			
		||||
        self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
 | 
			
		||||
 | 
			
		||||
    def test_mobilenet_quantization(self):
 | 
			
		||||
        model = torchvision.models.quantization.mobilenet_v2(pretrained=True)
 | 
			
		||||
        x = torch.randn(2, 3, 224, 224, requires_grad=True)
 | 
			
		||||
        self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
 | 
			
		||||
 | 
			
		||||
    def test_resnet_quantization(self):
 | 
			
		||||
        model = torchvision.models.quantization.resnet50(pretrained=True)
 | 
			
		||||
        x = torch.randn(2, 3, 224, 224, requires_grad=True)
 | 
			
		||||
        self.run_test(model, (x,))
 | 
			
		||||
 | 
			
		||||
    def test_shufflenet_quantization(self):
 | 
			
		||||
        model = torchvision.models.quantization.shufflenet_v2_x1_0(pretrained=True)
 | 
			
		||||
        x = torch.randn(2, 3, 224, 224, requires_grad=True)
 | 
			
		||||
        self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
 | 
			
		||||
 | 
			
		||||
    def test_r3d_18_video(self):
 | 
			
		||||
        model = torchvision.models.video.r3d_18(pretrained=True)
 | 
			
		||||
        x = torch.randn(1, 3, 4, 112, 112, requires_grad=True)
 | 
			
		||||
@ -238,6 +213,55 @@ class TestONNXRuntime(unittest.TestCase):
 | 
			
		||||
        # Only support CPU version, since tracer is not working in GPU RNN.
 | 
			
		||||
        self.run_test(model, (x, model.hidden))
 | 
			
		||||
 | 
			
		||||
    def get_image_from_url(self, url):
 | 
			
		||||
        import sys
 | 
			
		||||
        import os
 | 
			
		||||
        if sys.version_info < (3,):
 | 
			
		||||
            from urlparse import urlsplit
 | 
			
		||||
            import urllib2
 | 
			
		||||
            request = urllib2
 | 
			
		||||
        else:
 | 
			
		||||
            from urllib.parse import urlsplit
 | 
			
		||||
            from urllib import request
 | 
			
		||||
        from PIL import Image
 | 
			
		||||
        from torchvision import transforms
 | 
			
		||||
        from torch._utils_internal import get_writable_path
 | 
			
		||||
 | 
			
		||||
        filename = os.path.basename(urlsplit(url)[2])
 | 
			
		||||
        data_dir = get_writable_path(os.path.join(os.path.dirname(__file__)))
 | 
			
		||||
        path = os.path.join(data_dir, filename)
 | 
			
		||||
        data = request.urlopen(url, timeout=15).read()
 | 
			
		||||
        with open(path, 'wb') as f:
 | 
			
		||||
            f.write(data)
 | 
			
		||||
        image = Image.open(path).convert("RGB")
 | 
			
		||||
        image = image.resize((300, 200), Image.BILINEAR)
 | 
			
		||||
        to_tensor = transforms.ToTensor()
 | 
			
		||||
        return to_tensor(image)
 | 
			
		||||
 | 
			
		||||
    def get_test_images(self):
 | 
			
		||||
        image_url = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg"
 | 
			
		||||
        image = self.get_image_from_url(url=image_url)
 | 
			
		||||
        images = [image]
 | 
			
		||||
        return images
 | 
			
		||||
 | 
			
		||||
    @skipIfUnsupportedMinOpsetVersion(11)
 | 
			
		||||
    def test_keypoint_rcnn(self):
 | 
			
		||||
        class KeyPointRCNN(torch.nn.Module):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(KeyPointRCNN, self).__init__()
 | 
			
		||||
                self.model = torchvision.models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True,
 | 
			
		||||
                                                                                                  min_size=200,
 | 
			
		||||
                                                                                                  max_size=300)
 | 
			
		||||
 | 
			
		||||
            def forward(self, images):
 | 
			
		||||
                output = self.model(images)
 | 
			
		||||
                # TODO: The keypoints_scores require the use of Argmax that is updated in ONNX.
 | 
			
		||||
                #       For now we are testing all the output of KeypointRCNN except keypoints_scores.
 | 
			
		||||
                #       Enable When Argmax is updated in ONNX Runtime.
 | 
			
		||||
                return output[0]['boxes'], output[0]['labels'], output[0]['scores'], output[0]['keypoints']
 | 
			
		||||
        images = self.get_test_images()
 | 
			
		||||
        self.run_test(KeyPointRCNN(), (images,), rtol=1e-3, atol=1e-5)
 | 
			
		||||
 | 
			
		||||
    def test_word_language_model_RNN_TANH(self):
 | 
			
		||||
        self.run_word_language_model("RNN_TANH")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										148
									
								
								test/rpc_test.py
									
									
									
									
									
								
							
							
						
						
									
										148
									
								
								test/rpc_test.py
									
									
									
									
									
								
							@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 | 
			
		||||
import concurrent.futures
 | 
			
		||||
from datetime import timedelta
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import unittest
 | 
			
		||||
from collections import namedtuple
 | 
			
		||||
from unittest import mock
 | 
			
		||||
@ -18,6 +19,21 @@ from torch.distributed.rpc.api import _use_rpc_pickler
 | 
			
		||||
from torch.distributed.rpc.internal import PythonUDF, _internal_rpc_pickler
 | 
			
		||||
from rpc_agent_test_fixture import RpcAgentTestFixture
 | 
			
		||||
 | 
			
		||||
rpc_done = [False, False, False, False]
 | 
			
		||||
 | 
			
		||||
# TODO: dedupe this with the code in dist_autograd_test.py.
 | 
			
		||||
# Send rpc done info and context_id to
 | 
			
		||||
# dst_rank = (self.rank + rank_distance) % self.world_size
 | 
			
		||||
# we don't need a lock here since the GIL is held while executing remote
 | 
			
		||||
# python UDFs, so access is serialized across several workers.
 | 
			
		||||
def _set_rpc_done(rank_distance):
 | 
			
		||||
    global rpc_done
 | 
			
		||||
    rpc_done[rank_distance] = True
 | 
			
		||||
 | 
			
		||||
def _check_rpc_done(rank_distance):
 | 
			
		||||
    while not rpc_done[rank_distance]:
 | 
			
		||||
        # yield control to other threads
 | 
			
		||||
        time.sleep(0)
 | 
			
		||||
 | 
			
		||||
def requires_process_group_agent(message=""):
 | 
			
		||||
    def decorator(old_func):
 | 
			
		||||
@ -127,7 +143,6 @@ def my_tensor_function(a, b):
 | 
			
		||||
    return a + b
 | 
			
		||||
 | 
			
		||||
def my_sleep_func(seconds=1):
 | 
			
		||||
    import time
 | 
			
		||||
    time.sleep(seconds)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -306,7 +321,6 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker1",
 | 
			
		||||
            backend=backend,
 | 
			
		||||
            init_method=self.init_method,
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
@ -327,14 +341,13 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
                world_size=self.world_size,
 | 
			
		||||
                rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
            )
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    def test_reinit(self):
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker{}".format(self.rank),
 | 
			
		||||
            backend=self.rpc_backend,
 | 
			
		||||
            init_method=self.init_method,
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
@ -357,13 +370,13 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
            rpc.init_rpc(
 | 
			
		||||
                name="worker{}".format(self.rank),
 | 
			
		||||
                backend=self.rpc_backend,
 | 
			
		||||
                init_method=self.init_method,
 | 
			
		||||
                rank=self.rank,
 | 
			
		||||
                world_size=self.world_size,
 | 
			
		||||
                rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
            )
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
    @unittest.skip("test_invalid_names is flaky, see https://github.com/pytorch/pytorch/issues/25912")
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    def test_invalid_names(self):
 | 
			
		||||
        with self.assertRaisesRegex(RuntimeError, "Worker name must match"):
 | 
			
		||||
@ -430,8 +443,8 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
 | 
			
		||||
        from torch.distributed.rpc.api import _agent
 | 
			
		||||
        self.assertEqual(_agent, None)
 | 
			
		||||
        # wait_all_workers() should not do anything as _agent is None
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        # shutdown() should not do anything as _agent is None
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
        # We need this barrier here because although init_process_group is
 | 
			
		||||
        # blocking, it does not guarantee that all ranks are done with
 | 
			
		||||
        # initialization after the call. We did run into issues with it where
 | 
			
		||||
@ -508,12 +521,11 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
            self.assertEqual(ret, torch.ones(n, n) * 2)
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    def test_wait_all_workers(self):
 | 
			
		||||
    def test_shutdown(self):
 | 
			
		||||
        # Initialize RPC.
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker%d" % self.rank,
 | 
			
		||||
            backend=self.rpc_backend,
 | 
			
		||||
            init_method=self.init_method,
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
@ -527,7 +539,7 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
            args=(torch.ones(n, n), torch.ones(n, n)),
 | 
			
		||||
        )
 | 
			
		||||
        self.assertEqual(ret, torch.ones(n, n) * 2)
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
        with self.assertRaisesRegex(RuntimeError, "^RPC has not been initialized"):
 | 
			
		||||
            rpc.rpc_sync(
 | 
			
		||||
@ -536,8 +548,8 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
                args=(torch.ones(n, n), torch.ones(n, n)),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # it's safe to call wait_all_workers() multiple times
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        # it's safe to call shutdown() multiple times
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
    @dist_init
 | 
			
		||||
    def test_expected_src(self):
 | 
			
		||||
@ -701,8 +713,6 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
        self.assertEqual(ret, torch.ones(2, 2) + 1)
 | 
			
		||||
 | 
			
		||||
    def _stress_test_rpc(self, f, repeat=1000, args=()):
 | 
			
		||||
        import time
 | 
			
		||||
 | 
			
		||||
        n = self.rank + 1
 | 
			
		||||
        dst_rank = n % self.world_size
 | 
			
		||||
        futs = []
 | 
			
		||||
@ -1090,6 +1100,111 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(result, sum(vals))
 | 
			
		||||
 | 
			
		||||
    def _test_rref_leak(self, ignore_leak=False):
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker{}".format(self.rank),
 | 
			
		||||
            backend=self.rpc_backend,
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # This is for the below `dist.barrier`.
 | 
			
		||||
        # For `RpcAgent` other than `ProcessGroupAgent`,
 | 
			
		||||
        # no `_default_pg` is initialized.
 | 
			
		||||
        if not dist.is_initialized():
 | 
			
		||||
            dist.init_process_group(
 | 
			
		||||
                backend="gloo",
 | 
			
		||||
                init_method=self.init_method,
 | 
			
		||||
                rank=self.rank,
 | 
			
		||||
                world_size=self.world_size,
 | 
			
		||||
            )
 | 
			
		||||
        # Wait for all init to complete.
 | 
			
		||||
        dist.barrier()
 | 
			
		||||
 | 
			
		||||
        rref = rpc.remote(
 | 
			
		||||
            "worker{}".format((self.rank + 1) % self.world_size),
 | 
			
		||||
            torch.add,
 | 
			
		||||
            args=(torch.ones(2, 2), 1)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if ignore_leak:
 | 
			
		||||
            import torch.distributed.rpc.api as api
 | 
			
		||||
            api._ignore_rref_leak = True
 | 
			
		||||
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    def test_rref_leak(self):
 | 
			
		||||
        with self.assertRaisesRegex(RuntimeError, "Leaking RRef"):
 | 
			
		||||
            self._test_rref_leak()
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    def test_ignore_rref_leak(self):
 | 
			
		||||
        self._test_rref_leak(ignore_leak=True)
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
 | 
			
		||||
    def test_local_shutdown(self):
 | 
			
		||||
        # test that we can start RPC and then immediately locally shutdown
 | 
			
		||||
        # without sending any messages.
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker%d" % self.rank,
 | 
			
		||||
            backend=rpc.backend_registry.BackendType[
 | 
			
		||||
                dist_utils.TEST_CONFIG.rpc_backend_name
 | 
			
		||||
            ],
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
        )
 | 
			
		||||
        # pass in graceful=False to ensure that we don't wait for other workers.
 | 
			
		||||
        rpc.shutdown(graceful=False)
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
 | 
			
		||||
    def test_local_shutdown_with_rpc(self):
 | 
			
		||||
        # test that we can start RPC, send RPCs, and then run local shutdown.
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker%d" % self.rank,
 | 
			
		||||
            backend=rpc.backend_registry.BackendType[
 | 
			
		||||
                dist_utils.TEST_CONFIG.rpc_backend_name
 | 
			
		||||
            ],
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options,
 | 
			
		||||
        )
 | 
			
		||||
        n = self.rank + 1
 | 
			
		||||
        dst_rank = n % self.world_size
 | 
			
		||||
        ret = rpc.rpc_sync(
 | 
			
		||||
            "worker{}".format(dst_rank),
 | 
			
		||||
            torch.add,
 | 
			
		||||
            args=(torch.ones(n, n), torch.ones(n, n)),
 | 
			
		||||
        )
 | 
			
		||||
        # wait for RPCs to be done, so that some workers don't try to shut down
 | 
			
		||||
        # too early.
 | 
			
		||||
        rpc.rpc_sync("worker{}".format(dst_rank), _set_rpc_done, args=(1,))
 | 
			
		||||
        _check_rpc_done(1)
 | 
			
		||||
        # pass in graceful=False to ensure that we don't wait for other workers.
 | 
			
		||||
        rpc.shutdown(graceful=False)
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
 | 
			
		||||
    def test_wait_all_workers_and_shutdown(self):
 | 
			
		||||
        # This tests ensures that both rpc._wait_all_workers() and rpc.shutdown() can be
 | 
			
		||||
        # called without errors being raised due to attempting to shut down
 | 
			
		||||
        # multiple times.
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker%d" % self.rank,
 | 
			
		||||
            backend=rpc.backend_registry.BackendType[dist_utils.TEST_CONFIG.rpc_backend_name],
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=self.rpc_backend_options
 | 
			
		||||
        )
 | 
			
		||||
        from torch.distributed.rpc.api import _wait_all_workers
 | 
			
		||||
        # intentional call to internal _wait_all_workers.
 | 
			
		||||
        _wait_all_workers()
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
    @dist_init(setup_rpc=False)
 | 
			
		||||
    def test_get_rpc_timeout(self):
 | 
			
		||||
        timeout = timedelta(seconds=1)
 | 
			
		||||
@ -1102,14 +1217,13 @@ class RpcTest(RpcAgentTestFixture):
 | 
			
		||||
        rpc.init_rpc(
 | 
			
		||||
            name="worker{}".format(self.rank),
 | 
			
		||||
            backend=self.rpc_backend,
 | 
			
		||||
            init_method=self.init_method,
 | 
			
		||||
            rank=self.rank,
 | 
			
		||||
            world_size=self.world_size,
 | 
			
		||||
            rpc_backend_options=rpc_backend_options,
 | 
			
		||||
        )
 | 
			
		||||
        set_timeout = rpc.get_rpc_timeout()
 | 
			
		||||
        self.assertEqual(timeout, set_timeout)
 | 
			
		||||
        rpc.wait_all_workers()
 | 
			
		||||
        rpc.shutdown()
 | 
			
		||||
 | 
			
		||||
    @dist_init
 | 
			
		||||
    @requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
 | 
			
		||||
 | 
			
		||||
@ -422,6 +422,9 @@ class WorkerSpecificIterableDataset(IterableDataset):
 | 
			
		||||
        assert worker_info is not None
 | 
			
		||||
        return iter(range(self.sizes_for_all_workers[worker_info.id]))
 | 
			
		||||
 | 
			
		||||
    def __len__(self):
 | 
			
		||||
        return sum(self.sizes_for_all_workers)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Inspired by https://stackoverflow.com/a/26703365
 | 
			
		||||
# If all workers will call `sync_once`, they will be blocked until all workers
 | 
			
		||||
@ -961,8 +964,8 @@ class TestDataLoader(TestCase):
 | 
			
		||||
            # non-batched should not convert ints into tensors
 | 
			
		||||
            self.assertIsInstance(d, torch._six.int_classes)
 | 
			
		||||
            self.assertEqual(d, i)
 | 
			
		||||
        with self.assertRaisesRegex(TypeError, "Cannot determine the DataLoader length of a IterableDataset"):
 | 
			
		||||
            len(dataloader)  # DataLoader with iterable-style dataset should error in __len__
 | 
			
		||||
        # DataLoader should match len of the iterable-style dataset (if implemented)
 | 
			
		||||
        self.assertEqual(len(dataloader), len(dataset))
 | 
			
		||||
 | 
			
		||||
        # [no auto-batching] multiprocessing loading
 | 
			
		||||
        num_workers = 3
 | 
			
		||||
@ -978,8 +981,26 @@ class TestDataLoader(TestCase):
 | 
			
		||||
            # non-batched should not convert ints into tensors
 | 
			
		||||
            self.assertIsInstance(a, torch._six.int_classes)
 | 
			
		||||
            self.assertEqual(a, b)
 | 
			
		||||
        with self.assertRaisesRegex(TypeError, "Cannot determine the DataLoader length of a IterableDataset"):
 | 
			
		||||
            len(dataloader)  # DataLoader with iterable-style dataset should error in __len__
 | 
			
		||||
        # DataLoader should match len of the iterable-style dataset (if implemented)
 | 
			
		||||
        self.assertEqual(len(dataloader), len(dataset))
 | 
			
		||||
        # When loading more than len(dataset) data, after accessing len(dataloader),
 | 
			
		||||
        # we should get a warning. See NOTE [ IterableDataset and __len__ ].
 | 
			
		||||
        dataset = CountingIterableDataset(20)
 | 
			
		||||
        dataloader = DataLoader(dataset, num_workers=num_workers,
 | 
			
		||||
                                worker_init_fn=set_faulthander_if_available)
 | 
			
		||||
        it = iter(dataloader)
 | 
			
		||||
        for _ in range(40):
 | 
			
		||||
            self.assertNotWarn(lambda: next(it), "Should not warn before accessing len(dataloader)")
 | 
			
		||||
        self.assertEqual(len(dataloader), len(dataset))
 | 
			
		||||
        self.assertEqual(len(dataloader), 20)
 | 
			
		||||
        it = iter(dataloader)
 | 
			
		||||
        for _ in range(20):
 | 
			
		||||
            self.assertNotWarn(lambda: next(it), "Should not warn before exceeding length")
 | 
			
		||||
        for _ in range(3):
 | 
			
		||||
            self.assertWarnsRegex(
 | 
			
		||||
                lambda: next(it),
 | 
			
		||||
                r"but [0-9]+ samples have been fetched\. For multiprocessing data-loading, this",
 | 
			
		||||
                "Should always warn after exceeding length")
 | 
			
		||||
 | 
			
		||||
        # [no auto-batching] test that workers exit gracefully
 | 
			
		||||
        workers = dataloader_iter._workers
 | 
			
		||||
 | 
			
		||||
@ -5,7 +5,7 @@ import numpy as np
 | 
			
		||||
from hypothesis import given
 | 
			
		||||
from hypothesis import strategies as st
 | 
			
		||||
import hypothesis_utils as hu
 | 
			
		||||
from hypothesis_utils import no_deadline
 | 
			
		||||
hu.assert_deadline_disabled()
 | 
			
		||||
from common_utils import run_tests, TestCase
 | 
			
		||||
from torch.quantization import FakeQuantize
 | 
			
		||||
from torch.quantization import default_observer, default_per_channel_weight_observer
 | 
			
		||||
@ -64,10 +64,8 @@ NP_RANDOM_SEED = 19
 | 
			
		||||
tolerance = 1e-6
 | 
			
		||||
 | 
			
		||||
class TestFakeQuantizePerTensor(TestCase):
 | 
			
		||||
    # NOTE: Tests in this class are decorated with no_deadline
 | 
			
		||||
    # to prevent spurious failures due to cuda runtime initialization.
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.quint8)))
 | 
			
		||||
@ -85,7 +83,7 @@ class TestFakeQuantizePerTensor(TestCase):
 | 
			
		||||
            X, scale, zero_point, quant_min, quant_max)
 | 
			
		||||
        np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.quint8)))
 | 
			
		||||
@ -108,7 +106,8 @@ class TestFakeQuantizePerTensor(TestCase):
 | 
			
		||||
        Y_prime.backward(dout)
 | 
			
		||||
        np.testing.assert_allclose(dX.cpu(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    # https://github.com/pytorch/pytorch/issues/30604
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.quint8)))
 | 
			
		||||
@ -127,7 +126,7 @@ class TestFakeQuantizePerTensor(TestCase):
 | 
			
		||||
            X, scale, zero_point, quant_min, quant_max)
 | 
			
		||||
        np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=[torch.quint8])),
 | 
			
		||||
@ -206,10 +205,8 @@ class TestFakeQuantizePerTensor(TestCase):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestFakeQuantizePerChannel(TestCase):
 | 
			
		||||
    # NOTE: Tests in this class are decorated with no_deadline
 | 
			
		||||
    # to prevent spurious failures due to cuda runtime initialization.
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
           qparams=hu.qparams(dtypes=torch.quint8)))
 | 
			
		||||
@ -229,7 +226,7 @@ class TestFakeQuantizePerChannel(TestCase):
 | 
			
		||||
            X, scale, zero_point, axis, quant_min, quant_max)
 | 
			
		||||
        np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
           qparams=hu.qparams(dtypes=torch.quint8)))
 | 
			
		||||
@ -253,11 +250,10 @@ class TestFakeQuantizePerChannel(TestCase):
 | 
			
		||||
        Y_prime.backward(dout)
 | 
			
		||||
        np.testing.assert_allclose(dX.cpu().detach().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,),
 | 
			
		||||
           qparams=hu.qparams(dtypes=torch.quint8)))
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    def test_numerical_consistency_per_channel(self, device, X):
 | 
			
		||||
        r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
 | 
			
		||||
        """
 | 
			
		||||
@ -275,7 +271,7 @@ class TestFakeQuantizePerChannel(TestCase):
 | 
			
		||||
            X, scale, zero_point, axis, quant_min, quant_max)
 | 
			
		||||
        np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skip("temporarily disable the test")
 | 
			
		||||
    @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
 | 
			
		||||
           X=hu.per_channel_tensor(shapes=hu.array_shapes(2, 5,),
 | 
			
		||||
           qparams=hu.qparams(dtypes=torch.qint8)))
 | 
			
		||||
 | 
			
		||||
@ -3564,6 +3564,38 @@ graph(%Ra, %Rb):
 | 
			
		||||
                self.assertTrue(type(block.paramNode()) == torch._C.Node)
 | 
			
		||||
        self.assertTrue(tested_blocks)
 | 
			
		||||
 | 
			
		||||
    def test_export_opnames(self):
 | 
			
		||||
        class Foo(torch.jit.ScriptModule):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(Foo, self).__init__()
 | 
			
		||||
 | 
			
		||||
            def one(self, x, y):
 | 
			
		||||
                # type: (Tensor, Tensor) -> Tensor
 | 
			
		||||
                return x + y
 | 
			
		||||
 | 
			
		||||
            def two(self, x):
 | 
			
		||||
                # type: (Tensor) -> Tensor
 | 
			
		||||
                return 2 * x
 | 
			
		||||
 | 
			
		||||
            @torch.jit.script_method
 | 
			
		||||
            def forward(self, x):
 | 
			
		||||
                # type: (Tensor) -> Tensor
 | 
			
		||||
                return self.one(self.two(x), x)
 | 
			
		||||
 | 
			
		||||
        class Bar(torch.jit.ScriptModule):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                super(Bar, self).__init__()
 | 
			
		||||
                self.sub = Foo()
 | 
			
		||||
 | 
			
		||||
            def forward(self, x):
 | 
			
		||||
                # type: (Tensor) -> Tensor
 | 
			
		||||
                return self.sub.forward(x)
 | 
			
		||||
 | 
			
		||||
        bar = Bar()
 | 
			
		||||
        ops = torch.jit.export_opnames(bar)
 | 
			
		||||
        expected = ['aten::add.Tensor', 'aten::mul.Scalar', 'prim::Constant']
 | 
			
		||||
        self.assertEqual(ops, expected)
 | 
			
		||||
 | 
			
		||||
    def test_pytorch_jit_env_off(self):
 | 
			
		||||
        import subprocess
 | 
			
		||||
        env = os.environ.copy()
 | 
			
		||||
@ -7037,6 +7069,15 @@ a")
 | 
			
		||||
        self.checkScript(func1, (), optimize=True)
 | 
			
		||||
        self.checkScript(func2, (), optimize=True)
 | 
			
		||||
 | 
			
		||||
    # FIXME: get rid of this once we have actual ops using optional floats
 | 
			
		||||
    def test_optional_float(self):
 | 
			
		||||
        def _test_optional_float(x, scale):
 | 
			
		||||
            # type: (Tensor, Optional[float]) -> torch.Tensor
 | 
			
		||||
            return torch._test_optional_float(x, scale=scale)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual([0], torch.jit.script(_test_optional_float)(torch.randn(()), None).shape)
 | 
			
		||||
        self.assertEqual((), torch.jit.script(_test_optional_float)(torch.randn(()), 2.5).shape)
 | 
			
		||||
 | 
			
		||||
    def _test_tensor_number_math(self, device='cpu'):
 | 
			
		||||
        template = dedent('''
 | 
			
		||||
        def func(t):
 | 
			
		||||
 | 
			
		||||
@ -1038,6 +1038,11 @@ class TestNamedTensor(TestCase):
 | 
			
		||||
        self.assertEqual(out.names, ('N', 'C', 'H', 'W', 'K'))
 | 
			
		||||
        self.assertEqual(out.shape, (7, 2, 3, 5, 11))
 | 
			
		||||
 | 
			
		||||
        # takes negative positional dim
 | 
			
		||||
        out = tensor.unflatten(-2, (('C', 2), ('H', 3), ('W', 5)))
 | 
			
		||||
        self.assertEqual(out.names, ('N', 'C', 'H', 'W', 'K'))
 | 
			
		||||
        self.assertEqual(out.shape, (7, 2, 3, 5, 11))
 | 
			
		||||
 | 
			
		||||
        with self.assertRaisesRegex(RuntimeError, "don't multiply up to"):
 | 
			
		||||
            tensor.unflatten('D', (('H', 3), ('W', 5)))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -6232,6 +6232,38 @@ class TestNN(NNTestCase):
 | 
			
		||||
        inp = torch.randn(4, 5, device='cuda', requires_grad=True)
 | 
			
		||||
        gradgradcheck(F.pdist, (inp,))
 | 
			
		||||
 | 
			
		||||
    def test_cosine_embedding_loss_with_diff_type(self):
 | 
			
		||||
        for device in device_():
 | 
			
		||||
            input1 = torch.tensor([[2, 3, 4], [6, 2, 4]], dtype=torch.double, device=device)
 | 
			
		||||
            input2 = torch.tensor([[2, 3, 5], [3, 2, 1]], dtype=torch.double, device=device)
 | 
			
		||||
            target = torch.tensor([1, -1], dtype=torch.int, device=device)
 | 
			
		||||
            expected = torch.nn.functional.cosine_embedding_loss(input1, input2, target)
 | 
			
		||||
            for dt1 in torch.testing.get_all_math_dtypes(device):
 | 
			
		||||
                for dt2 in torch.testing.get_all_math_dtypes(device):
 | 
			
		||||
                    for dt3 in torch.testing.get_all_math_dtypes(device):
 | 
			
		||||
                        # dt3 is used as dtype for target = [1, -1], so let's skip unsigned type
 | 
			
		||||
                        if dt3 == torch.uint8:
 | 
			
		||||
                            continue
 | 
			
		||||
                        input1 = input1.to(dt1)
 | 
			
		||||
                        input2 = input2.to(dt2)
 | 
			
		||||
                        target = target.to(dt3)
 | 
			
		||||
                        result = torch.nn.functional.cosine_embedding_loss(input1, input2, target)
 | 
			
		||||
                        self.assertEqual(result.item(), expected.item(), 0.001)
 | 
			
		||||
 | 
			
		||||
    def test_kl_div_with_diff_type(self):
 | 
			
		||||
        for device in device_():
 | 
			
		||||
            input = torch.tensor([[2, 3, 5], [3, 2, 1]], dtype=torch.double, device=device)
 | 
			
		||||
            target = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.double, device=device)
 | 
			
		||||
            expected = torch.nn.functional.kl_div(input, target)
 | 
			
		||||
            for input_dtype in torch.testing.get_all_math_dtypes(device):
 | 
			
		||||
                for target_dtype in [torch.float32, torch.float64, torch.float16]:
 | 
			
		||||
                    if (torch.device(device).type == 'cpu' and target_dtype == torch.float16):
 | 
			
		||||
                        continue
 | 
			
		||||
                    input = input.to(input_dtype)
 | 
			
		||||
                    target = target.to(target_dtype)
 | 
			
		||||
                    result = torch.nn.functional.kl_div(input, target)
 | 
			
		||||
                    self.assertEqual(result.item(), expected.item(), 0.001)
 | 
			
		||||
 | 
			
		||||
    def test_cosine_embedding_loss_no_reduce(self):
 | 
			
		||||
        input1 = torch.randn(15, 10, requires_grad=True)
 | 
			
		||||
        input2 = torch.randn(15, 10, requires_grad=True)
 | 
			
		||||
 | 
			
		||||
@ -309,6 +309,30 @@ class TestNumbaIntegration(common.TestCase):
 | 
			
		||||
                torch_ary += 42
 | 
			
		||||
                self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary) + 42)
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMPY, "No numpy")
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "No cuda")
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
 | 
			
		||||
    def test_from_cuda_array_interface_inferred_strides(self):
 | 
			
		||||
        """torch.as_tensor(numba_ary) should have correct inferred (contiguous) strides"""
 | 
			
		||||
        # This could, in theory, be combined with test_from_cuda_array_interface but that test
 | 
			
		||||
        # is overly strict: it checks that the exported protocols are exactly the same, which
 | 
			
		||||
        # cannot handle differening exported protocol versions.
 | 
			
		||||
        dtypes = [
 | 
			
		||||
            numpy.float64,
 | 
			
		||||
            numpy.float32,
 | 
			
		||||
            numpy.int64,
 | 
			
		||||
            numpy.int32,
 | 
			
		||||
            numpy.int16,
 | 
			
		||||
            numpy.int8,
 | 
			
		||||
            numpy.uint8,
 | 
			
		||||
        ]
 | 
			
		||||
        for dtype in dtypes:
 | 
			
		||||
            numpy_ary = numpy.arange(6).reshape(2, 3).astype(dtype),
 | 
			
		||||
            numba_ary = numba.cuda.to_device(numpy_ary)
 | 
			
		||||
            self.assertTrue(numba_ary.is_c_contiguous())
 | 
			
		||||
            torch_ary = torch.as_tensor(numba_ary, device="cuda")
 | 
			
		||||
            self.assertTrue(torch_ary.is_contiguous())
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMPY, "No numpy")
 | 
			
		||||
    @unittest.skipIf(not TEST_CUDA, "No cuda")
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
 | 
			
		||||
 | 
			
		||||
@ -11,15 +11,13 @@ import torch.backends.mkldnn
 | 
			
		||||
from common_utils import TestCase, run_tests
 | 
			
		||||
from hypothesis import given
 | 
			
		||||
from hypothesis import strategies as st
 | 
			
		||||
from hypothesis_utils import no_deadline
 | 
			
		||||
import hypothesis_utils as hu
 | 
			
		||||
hu.assert_deadline_disabled()
 | 
			
		||||
from functools import reduce
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class IntrinsicQATModuleTest(TestCase):
 | 
			
		||||
    # NOTE: Tests in this class are decorated with no_deadline
 | 
			
		||||
    # to prevent spurious failures due to cuda runtime initialization.
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(batch_size=st.integers(2, 4),
 | 
			
		||||
           input_channels_per_group=st.sampled_from([2, 3, 4]),
 | 
			
		||||
           height=st.integers(5, 10),
 | 
			
		||||
 | 
			
		||||
@ -42,7 +42,8 @@ from jit_utils import get_forward
 | 
			
		||||
 | 
			
		||||
from hypothesis import given
 | 
			
		||||
from hypothesis import strategies as st
 | 
			
		||||
from hypothesis_utils import no_deadline
 | 
			
		||||
import hypothesis_utils as hu
 | 
			
		||||
hu.assert_deadline_disabled()
 | 
			
		||||
import io
 | 
			
		||||
import copy
 | 
			
		||||
 | 
			
		||||
@ -50,7 +51,6 @@ import copy
 | 
			
		||||
                     " Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs"
 | 
			
		||||
                     " with instruction set support avx2 or newer.")
 | 
			
		||||
class EagerModePostTrainingQuantTest(QuantizationTestCase):
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(qconfig=st.sampled_from((torch.quantization.default_qconfig, torch.quantization.default_per_channel_qconfig)))
 | 
			
		||||
    def test_single_layer(self, qconfig):
 | 
			
		||||
        r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped
 | 
			
		||||
@ -919,7 +919,6 @@ class GraphModePostTrainingQuantTest(QuantizationTestCase):
 | 
			
		||||
 | 
			
		||||
class FunctionalModuleTest(QuantizationTestCase):
 | 
			
		||||
    # Histogram Observers are slow, so have no-deadline to ensure test doesn't time out
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(train_mode=st.booleans())
 | 
			
		||||
    def test_functional_module(self, train_mode):
 | 
			
		||||
        model = ModelWithFunctionals()
 | 
			
		||||
@ -1349,7 +1348,6 @@ class RecordHistogramObserverTest(QuantizationTestCase):
 | 
			
		||||
        self.assertEqual(len(observer_dict['fc1.module.activation_post_process'].get_tensor_value()), 2 * len(self.calib_data))
 | 
			
		||||
        self.assertEqual(observer_dict['fc1.module.activation_post_process'].get_tensor_value()[0], model(self.calib_data[0][0]))
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(qdtype=st.sampled_from((torch.qint8, torch.quint8)),
 | 
			
		||||
           qscheme=st.sampled_from((torch.per_tensor_affine, torch.per_tensor_symmetric)))
 | 
			
		||||
    def test_observer_scriptable(self, qdtype, qscheme):
 | 
			
		||||
@ -1366,7 +1364,6 @@ class RecordHistogramObserverTest(QuantizationTestCase):
 | 
			
		||||
        loaded = torch.jit.load(buf)
 | 
			
		||||
        self.assertTrue(torch.equal(obs.get_tensor_value()[0], loaded.get_tensor_value()[0]))
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(qdtype=st.sampled_from((torch.qint8, torch.quint8)),
 | 
			
		||||
           qscheme=st.sampled_from((torch.per_tensor_affine, torch.per_tensor_symmetric)),
 | 
			
		||||
           reduce_range=st.booleans())
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,7 @@ from hypothesis import settings, HealthCheck
 | 
			
		||||
from hypothesis import assume, given
 | 
			
		||||
from hypothesis import strategies as st
 | 
			
		||||
import hypothesis_utils as hu
 | 
			
		||||
from hypothesis_utils import no_deadline
 | 
			
		||||
hu.assert_deadline_disabled()
 | 
			
		||||
 | 
			
		||||
from common_utils import TEST_WITH_UBSAN, TestCase, run_tests, IS_PPC, IS_MACOS
 | 
			
		||||
from common_quantized import _quantize, _dequantize, _calculate_dynamic_qparams, \
 | 
			
		||||
@ -145,7 +145,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                                 message="{} relu failed".format(name))
 | 
			
		||||
 | 
			
		||||
    """Tests the correctness of the scalar addition."""
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(A=hu.tensor(shapes=hu.array_shapes(1, 4, 1, 5),
 | 
			
		||||
                       elements=st.floats(-1e6, 1e6, allow_nan=False),
 | 
			
		||||
                       qparams=hu.qparams()),
 | 
			
		||||
@ -506,7 +505,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
        self.assertEqual(a_ref, a_hat.dequantize(),
 | 
			
		||||
                         message="ops.quantized.max_pool2d results are off")
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
 | 
			
		||||
                                              min_side=5, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.quint8)),
 | 
			
		||||
@ -556,7 +554,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                             message=error_message.format(name + '.zero_point', scale,
 | 
			
		||||
                                                          qX_hat.q_zero_point()))
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
 | 
			
		||||
                                              min_side=5, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.qint8)),
 | 
			
		||||
@ -619,7 +616,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                             message=error_message.format(name + '.zero_point', scale,
 | 
			
		||||
                             X_hat.q_zero_point()))
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
 | 
			
		||||
                                              min_side=1, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.quint8)),
 | 
			
		||||
@ -662,7 +658,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                                                          qX_hat.q_zero_point()))
 | 
			
		||||
 | 
			
		||||
    """Tests adaptive average pool operation on NHWC quantized tensors."""
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
 | 
			
		||||
                                              min_side=1, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams(dtypes=torch.qint8)),
 | 
			
		||||
@ -708,7 +703,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                             message=error_message.format(name + '.zero_point', scale,
 | 
			
		||||
                                                          X_hat.q_zero_point()))
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
 | 
			
		||||
                                              min_side=1, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams()),
 | 
			
		||||
@ -733,7 +727,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
        torch.testing.assert_allclose(quantized_out[0].dequantize(), unquantized_out[0])
 | 
			
		||||
        torch.testing.assert_allclose(quantized_out[1], unquantized_out[1])
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
 | 
			
		||||
                                              min_side=1, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams()),
 | 
			
		||||
@ -818,7 +811,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
            cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale,
 | 
			
		||||
                             zero_point=zero_point)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
 | 
			
		||||
                                              min_side=5, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams()),
 | 
			
		||||
@ -874,7 +866,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                                                          qX_hat.q_zero_point()))
 | 
			
		||||
 | 
			
		||||
    """Tests quantize concatenation (both fused and not)."""
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
 | 
			
		||||
                                              min_side=1, max_side=10),
 | 
			
		||||
                       qparams=hu.qparams()),
 | 
			
		||||
@ -999,7 +990,6 @@ class TestQuantizedOps(TestCase):
 | 
			
		||||
                     " with instruction set support avx2 or newer.")
 | 
			
		||||
class TestDynamicQuantizedLinear(TestCase):
 | 
			
		||||
    """Tests the correctness of the dynamic quantized linear and linear_relu op."""
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(
 | 
			
		||||
        batch_size=st.integers(1, 4),
 | 
			
		||||
        input_channels=st.integers(16, 32),
 | 
			
		||||
@ -1112,7 +1102,6 @@ class TestDynamicQuantizedLinear(TestCase):
 | 
			
		||||
                         message="torch.ops.quantized.linear_dynamic (fbgemm) results are off")
 | 
			
		||||
 | 
			
		||||
    """Tests the correctness of the legacy dynamic quantized linear op."""
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(
 | 
			
		||||
        batch_size=st.integers(1, 4),
 | 
			
		||||
        input_channels=st.integers(16, 32),
 | 
			
		||||
@ -1189,7 +1178,6 @@ class TestDynamicQuantizedLinear(TestCase):
 | 
			
		||||
 | 
			
		||||
class TestQuantizedLinear(unittest.TestCase):
 | 
			
		||||
    """Tests the correctness of the quantized linear and linear_relu op."""
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(batch_size=st.integers(1, 4),
 | 
			
		||||
           input_channels=st.integers(16, 32),
 | 
			
		||||
           output_channels=st.integers(4, 8),
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,8 @@ from common_quantized import _calculate_dynamic_qparams, override_quantized_engi
 | 
			
		||||
from common_utils import run_tests, IS_PPC, TEST_WITH_UBSAN
 | 
			
		||||
from hypothesis import assume, given
 | 
			
		||||
from hypothesis import strategies as st
 | 
			
		||||
from hypothesis_utils import no_deadline
 | 
			
		||||
import hypothesis_utils as hu
 | 
			
		||||
hu.assert_deadline_disabled()
 | 
			
		||||
 | 
			
		||||
import io
 | 
			
		||||
import numpy as np
 | 
			
		||||
@ -127,7 +128,6 @@ class FunctionalAPITest(QuantizationTestCase):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(batch_size=st.integers(1, 3),
 | 
			
		||||
           in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
 | 
			
		||||
           H=st.integers(4, 16),
 | 
			
		||||
@ -181,7 +181,6 @@ class FunctionalAPITest(QuantizationTestCase):
 | 
			
		||||
                W_scale, W_zero_point, Y_scale, Y_zero_point, use_bias,
 | 
			
		||||
                use_channelwise)
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(batch_size=st.integers(1, 3),
 | 
			
		||||
           in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
 | 
			
		||||
           D=st.integers(4, 8),
 | 
			
		||||
@ -239,7 +238,6 @@ class FunctionalAPITest(QuantizationTestCase):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DynamicModuleAPITest(QuantizationTestCase):
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @unittest.skipUnless('fbgemm' in torch.backends.quantized.supported_engines,
 | 
			
		||||
                         " Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs"
 | 
			
		||||
                         " with instruction set support avx2 or newer.")
 | 
			
		||||
@ -357,7 +355,6 @@ class ModuleAPITest(QuantizationTestCase):
 | 
			
		||||
                         message="ReLU6 module API failed")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(
 | 
			
		||||
        batch_size=st.integers(1, 5),
 | 
			
		||||
        in_features=st.integers(16, 32),
 | 
			
		||||
@ -421,7 +418,6 @@ class ModuleAPITest(QuantizationTestCase):
 | 
			
		||||
            self.assertEqual(Z_ref, Z_q)
 | 
			
		||||
 | 
			
		||||
            # Test serialization of quantized Linear Module using state_dict
 | 
			
		||||
 | 
			
		||||
            model_dict = qlinear.state_dict()
 | 
			
		||||
            self.assertEqual(model_dict['_packed_params.weight'], W_q)
 | 
			
		||||
            if use_bias:
 | 
			
		||||
@ -647,7 +643,6 @@ class ModuleAPITest(QuantizationTestCase):
 | 
			
		||||
        # Smoke test extra_repr
 | 
			
		||||
        self.assertTrue(module_name in str(converted_qconv_module))
 | 
			
		||||
 | 
			
		||||
    @no_deadline
 | 
			
		||||
    @given(batch_size=st.integers(1, 3),
 | 
			
		||||
           in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
 | 
			
		||||
           H=st.integers(4, 16),
 | 
			
		||||
 | 
			
		||||
@ -763,6 +763,45 @@ class _TestTorchMixin(object):
 | 
			
		||||
            res = torch.where(a > 0)
 | 
			
		||||
            self.assertEqual(1, len(res))
 | 
			
		||||
 | 
			
		||||
    def test_where_tensor(self):
 | 
			
		||||
        def rand_tensor(size, dtype, device):
 | 
			
		||||
            if dtype.is_floating_point:
 | 
			
		||||
                return torch.rand(size=size, dtype=dtype, device=device)
 | 
			
		||||
            elif dtype == torch.uint8:
 | 
			
		||||
                return torch.randint(1, 5, size=size, dtype=dtype, device=device)
 | 
			
		||||
            elif dtype == torch.bool:
 | 
			
		||||
                return torch.randint(0, 1, size=size, dtype=dtype, device=device).bool()
 | 
			
		||||
            else:
 | 
			
		||||
                return torch.randint(-5, 5, size=size, dtype=dtype, device=device)
 | 
			
		||||
 | 
			
		||||
        def get_tensor(size, dtype, device, contiguous):
 | 
			
		||||
            if not contiguous and len(size) < 2:
 | 
			
		||||
                raise RuntimeError("Unable to generate non contiguous tensor with size < 2")
 | 
			
		||||
            t = rand_tensor(size, dtype, device)
 | 
			
		||||
            if contiguous:
 | 
			
		||||
                return t
 | 
			
		||||
            else:
 | 
			
		||||
                return t.transpose(0, 1)
 | 
			
		||||
 | 
			
		||||
        height = 5
 | 
			
		||||
        width = 5
 | 
			
		||||
        for device in torch.testing.get_all_device_types():
 | 
			
		||||
            for dt1 in torch.testing.get_all_math_dtypes(device):
 | 
			
		||||
                for dt2 in torch.testing.get_all_math_dtypes(device):
 | 
			
		||||
                    for contiguous in [True, False]:
 | 
			
		||||
                        x1 = get_tensor((height, width), dt1, device, contiguous)
 | 
			
		||||
                        x2 = get_tensor((height, width), dt2, device, contiguous)
 | 
			
		||||
                        if dt1 != dt2:
 | 
			
		||||
                            self.assertRaisesRegex(RuntimeError, "expected scalar type", lambda: torch.where(x1 == 1, x1, x2))
 | 
			
		||||
                        else:
 | 
			
		||||
                            if x1.is_floating_point():
 | 
			
		||||
                                condition = (x1 < 0.5)
 | 
			
		||||
                            else:
 | 
			
		||||
                                condition = (x1 == 1)
 | 
			
		||||
                            expected = condition.to(x1.dtype) * x1 + (~condition).to(x2.dtype) * x2
 | 
			
		||||
                            result = torch.where(condition, x1, x2)
 | 
			
		||||
                            self.assertEqual(expected, result)
 | 
			
		||||
 | 
			
		||||
    def test_all_any_with_dim(self):
 | 
			
		||||
        def test(x):
 | 
			
		||||
            r1 = x.prod(dim=0, keepdim=False).byte()
 | 
			
		||||
@ -1772,6 +1811,13 @@ class _TestTorchMixin(object):
 | 
			
		||||
        x = torch.tensor(2., requires_grad=True)
 | 
			
		||||
        self.assertRaises(Exception, lambda: y.addcmul(y, y, value=x))
 | 
			
		||||
 | 
			
		||||
    # FIXME: get rid of this once we have actual ops using optional floats
 | 
			
		||||
    def test_optional_floats(self):
 | 
			
		||||
        x = torch.randn(())
 | 
			
		||||
        self.assertEqual(torch._test_optional_float(x), torch.empty((0,)))
 | 
			
		||||
        self.assertEqual(torch._test_optional_float(x, scale=None), torch.empty((0,)))
 | 
			
		||||
        self.assertEqual(torch._test_optional_float(x, scale=2.5), torch.full((), 2.5))
 | 
			
		||||
 | 
			
		||||
    def test_copy_broadcast(self):
 | 
			
		||||
        torch.zeros(5, 6).copy_(torch.zeros(6))
 | 
			
		||||
        self.assertRaises(RuntimeError, lambda: torch.zeros(5, 6).copy_(torch.zeros(30)))
 | 
			
		||||
@ -13661,23 +13707,69 @@ class TestTorchDeviceType(TestCase):
 | 
			
		||||
        result = torch.cat(concat_list)
 | 
			
		||||
        self.assertEqual(result.size(0), SIZE1 + SIZE2)
 | 
			
		||||
 | 
			
		||||
# NOTE [Linspace+Logspace precision override]
 | 
			
		||||
# Our Linspace and logspace torch.half CUDA kernels are not very precise.
 | 
			
		||||
# Since linspace/logspace are deterministic, we can compute an expected
 | 
			
		||||
# amount of error (by testing without a precision override), adding a tiny
 | 
			
		||||
# amount (EPS) to that, and using that value as the override.
 | 
			
		||||
LINSPACE_LOGSPACE_EXTRA_EPS = 1e-5
 | 
			
		||||
 | 
			
		||||
# Tests that compare a device's computation with the (gold-standard) CPU's.
 | 
			
		||||
class TestDevicePrecision(TestCase):
 | 
			
		||||
    def test_linspace(self, device):
 | 
			
		||||
        a = torch.linspace(0, 10, 10, device=device)
 | 
			
		||||
        b = torch.linspace(0, 10, 10)
 | 
			
		||||
 | 
			
		||||
    # The implementation of linspace+logspace goes through a different path
 | 
			
		||||
    # when the steps arg is equal to 0 or 1. For other values of `steps`
 | 
			
		||||
    # they call specialized linspace (or logspace) kernels.
 | 
			
		||||
    LINSPACE_LOGSPACE_SPECIAL_STEPS = [0, 1]
 | 
			
		||||
 | 
			
		||||
    def _test_linspace(self, device, dtype, steps):
 | 
			
		||||
        a = torch.linspace(0, 10, steps=steps, dtype=dtype, device=device)
 | 
			
		||||
        b = torch.linspace(0, 10, steps=steps)
 | 
			
		||||
        self.assertEqual(a, b)
 | 
			
		||||
 | 
			
		||||
    @dtypes(torch.double)
 | 
			
		||||
    # See NOTE [Linspace+Logspace precision override]
 | 
			
		||||
    @precisionOverride({torch.half: 0.0039 + LINSPACE_LOGSPACE_EXTRA_EPS})
 | 
			
		||||
    @dtypesIfCUDA(torch.half, torch.float, torch.double)
 | 
			
		||||
    @dtypes(torch.float, torch.double)
 | 
			
		||||
    def test_linspace(self, device, dtype):
 | 
			
		||||
        self._test_linspace(device, dtype, steps=10)
 | 
			
		||||
 | 
			
		||||
    @dtypesIfCUDA(torch.half, torch.float, torch.double)
 | 
			
		||||
    @dtypes(torch.float, torch.double)
 | 
			
		||||
    def test_linspace_special_steps(self, device, dtype):
 | 
			
		||||
        for steps in self.LINSPACE_LOGSPACE_SPECIAL_STEPS:
 | 
			
		||||
            self._test_linspace(device, dtype, steps=steps)
 | 
			
		||||
 | 
			
		||||
    def _test_logspace(self, device, dtype, steps):
 | 
			
		||||
        a = torch.logspace(1, 1.1, steps=steps, dtype=dtype, device=device)
 | 
			
		||||
        b = torch.logspace(1, 1.1, steps=steps)
 | 
			
		||||
        self.assertEqual(a, b)
 | 
			
		||||
 | 
			
		||||
    def _test_logspace_base2(self, device, dtype, steps):
 | 
			
		||||
        a = torch.logspace(1, 1.1, steps=steps, base=2, dtype=dtype, device=device)
 | 
			
		||||
        b = torch.logspace(1, 1.1, steps=steps, base=2)
 | 
			
		||||
        self.assertEqual(a, b)
 | 
			
		||||
 | 
			
		||||
    # See NOTE [Linspace+Logspace precision override]
 | 
			
		||||
    @precisionOverride({torch.half: 0.0157 + LINSPACE_LOGSPACE_EXTRA_EPS})
 | 
			
		||||
    @dtypesIfCUDA(torch.half, torch.float, torch.double)
 | 
			
		||||
    @dtypes(torch.float, torch.double)
 | 
			
		||||
    def test_logspace(self, device, dtype):
 | 
			
		||||
        a = torch.logspace(1, 10, 10, dtype=dtype, device=device)
 | 
			
		||||
        b = torch.logspace(1, 10, 10, dtype=dtype, device='cpu')
 | 
			
		||||
        self.assertEqual(a, b)
 | 
			
		||||
        self._test_logspace(device, dtype, steps=10)
 | 
			
		||||
 | 
			
		||||
        # Check non-default base=2
 | 
			
		||||
        a = torch.logspace(1, 10, 10, 2, dtype=dtype, device=device)
 | 
			
		||||
        b = torch.logspace(1, 10, 10, 2, dtype=dtype, device='cpu')
 | 
			
		||||
        self.assertEqual(a, b)
 | 
			
		||||
    # See NOTE [Linspace+Logspace precision override]
 | 
			
		||||
    @precisionOverride({torch.half: 0.00201 + LINSPACE_LOGSPACE_EXTRA_EPS})
 | 
			
		||||
    @dtypesIfCUDA(torch.half, torch.float, torch.double)
 | 
			
		||||
    @dtypes(torch.float, torch.double)
 | 
			
		||||
    def test_logspace_base2(self, device, dtype):
 | 
			
		||||
        self._test_logspace_base2(device, dtype, steps=10)
 | 
			
		||||
 | 
			
		||||
    @dtypesIfCUDA(torch.half, torch.float, torch.double)
 | 
			
		||||
    @dtypes(torch.float, torch.double)
 | 
			
		||||
    def test_logspace_special_steps(self, device, dtype):
 | 
			
		||||
        for steps in self.LINSPACE_LOGSPACE_SPECIAL_STEPS:
 | 
			
		||||
            self._test_logspace(device, dtype, steps=steps)
 | 
			
		||||
            self._test_logspace_base2(device, dtype, steps=steps)
 | 
			
		||||
 | 
			
		||||
    # Note: ROCm fails when using float tensors
 | 
			
		||||
    @dtypes(torch.double)
 | 
			
		||||
 | 
			
		||||
@ -328,6 +328,7 @@ def create_python_bindings(python_functions, has_self, is_module=False):
 | 
			
		||||
        'c10::optional<Scalar>': 'scalarOptional',
 | 
			
		||||
        'c10::optional<int64_t>': 'toInt64Optional',
 | 
			
		||||
        'c10::optional<bool>': 'toBoolOptional',
 | 
			
		||||
        'c10::optional<double>': 'toDoubleOptional',
 | 
			
		||||
        'IntArrayRef': 'intlist',
 | 
			
		||||
        'int64_t': 'toInt64',
 | 
			
		||||
        'bool': 'toBool',
 | 
			
		||||
 | 
			
		||||
@ -63,6 +63,7 @@ TYPE_MAP = {
 | 
			
		||||
    'int64_t': 'int',
 | 
			
		||||
    'int64_t?': 'int?',
 | 
			
		||||
    'double': 'float',
 | 
			
		||||
    'double?': 'float?',
 | 
			
		||||
    'bool': 'bool',
 | 
			
		||||
    'bool?': 'bool?',
 | 
			
		||||
    'Generator': 'Generator?',
 | 
			
		||||
@ -115,6 +116,7 @@ FROM_IVALUE = {
 | 
			
		||||
    'bool': '{}.toBool()',
 | 
			
		||||
    'bool?': '{}.toOptional<bool>()',
 | 
			
		||||
    'double': '{}.toDouble()',
 | 
			
		||||
    'double?': '{}.toOptional<double>()',
 | 
			
		||||
    'int64_t': '{}.toInt()',
 | 
			
		||||
    'int64_t?': '{}.toOptional<int64_t>()',
 | 
			
		||||
    'std::string': '{}.toStringRef()',
 | 
			
		||||
 | 
			
		||||
@ -3751,25 +3751,37 @@ add_docstr(torch.nonzero,
 | 
			
		||||
           r"""
 | 
			
		||||
nonzero(input, *, out=None, as_tuple=False) -> LongTensor or tuple of LongTensors
 | 
			
		||||
 | 
			
		||||
**When** :attr:`as_tuple` **is false or unspecified:**
 | 
			
		||||
.. note::
 | 
			
		||||
    :func:`torch.nonzero(..., as_tuple=False) <torch.nonzero>` (default) returns a
 | 
			
		||||
    2-D tensor where each row is the index for a nonzero value.
 | 
			
		||||
 | 
			
		||||
    :func:`torch.nonzero(..., as_tuple=True) <torch.nonzero>` returns a tuple of 1-D
 | 
			
		||||
    index tensors, allowing for advanced indexing, so ``x[x.nonzero(as_tuple=True)]``
 | 
			
		||||
    gives all nonzero values of tensor ``x``. Of the returned tuple, each index tensor
 | 
			
		||||
    contains nonzero indices for a certain dimension.
 | 
			
		||||
 | 
			
		||||
    See below for more details on the two behaviors.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
**When** :attr:`as_tuple` **is ``False`` (default)**:
 | 
			
		||||
 | 
			
		||||
Returns a tensor containing the indices of all non-zero elements of
 | 
			
		||||
:attr:`input`.  Each row in the result contains the indices of a non-zero
 | 
			
		||||
element in :attr:`input`. The result is sorted lexicographically, with
 | 
			
		||||
the last index changing the fastest (C-style).
 | 
			
		||||
 | 
			
		||||
If :attr:`input` has `n` dimensions, then the resulting indices tensor
 | 
			
		||||
If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor
 | 
			
		||||
:attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of
 | 
			
		||||
non-zero elements in the :attr:`input` tensor.
 | 
			
		||||
 | 
			
		||||
**When** :attr:`as_tuple` **is true:**
 | 
			
		||||
**When** :attr:`as_tuple` **is ``True``**:
 | 
			
		||||
 | 
			
		||||
Returns a tuple of 1-D tensors, one for each dimension in :attr:`input`,
 | 
			
		||||
each containing the indices (in that dimension) of all non-zero elements of
 | 
			
		||||
:attr:`input` .
 | 
			
		||||
 | 
			
		||||
If :attr:`input` has `n` dimensions, then the resulting tuple contains `n` tensors
 | 
			
		||||
of size `z`, where `z` is the total number of
 | 
			
		||||
If :attr:`input` has :math:`n` dimensions, then the resulting tuple contains :math:`n`
 | 
			
		||||
tensors of size :math:`z`, where :math:`z` is the total number of
 | 
			
		||||
non-zero elements in the :attr:`input` tensor.
 | 
			
		||||
 | 
			
		||||
As a special case, when :attr:`input` has zero dimensions and a nonzero scalar
 | 
			
		||||
@ -3780,8 +3792,8 @@ Args:
 | 
			
		||||
    out (LongTensor, optional): the output tensor containing indices
 | 
			
		||||
 | 
			
		||||
Returns:
 | 
			
		||||
    LongTensor or tuple of LongTensor: If :attr:`as_tuple` is false, the output
 | 
			
		||||
    tensor containing indices. If :attr:`as_tuple` is true, one 1-D tensor for
 | 
			
		||||
    LongTensor or tuple of LongTensor: If :attr:`as_tuple` is ``False``, the output
 | 
			
		||||
    tensor containing indices. If :attr:`as_tuple` is ``True``, one 1-D tensor for
 | 
			
		||||
    each dimension, containing the indices of each nonzero element along that
 | 
			
		||||
    dimension.
 | 
			
		||||
 | 
			
		||||
@ -5199,6 +5211,9 @@ i.e., if the last two dimensions of :attr:`input` are ``m`` and ``n``, then the
 | 
			
		||||
If :attr:`compute_uv` is ``False``, the returned `U` and `V` matrices will be zero matrices
 | 
			
		||||
of shape :math:`(m \times m)` and :math:`(n \times n)` respectively. :attr:`some` will be ignored here.
 | 
			
		||||
 | 
			
		||||
.. note:: The singular values are returned in descending order. If :attr:`input` is a batch of matrices,
 | 
			
		||||
          then the singular values of each matrix in the batch is returned in descending order.
 | 
			
		||||
 | 
			
		||||
.. note:: The implementation of SVD on CPU uses the LAPACK routine `?gesdd` (a divide-and-conquer
 | 
			
		||||
          algorithm) instead of `?gesvd` for speed. Analogously, the SVD on GPU uses the MAGMA routine
 | 
			
		||||
          `gesdd` as well.
 | 
			
		||||
@ -5279,6 +5294,9 @@ only the upper triangular portion is used by default.
 | 
			
		||||
 | 
			
		||||
If :attr:`upper` is ``False``, then lower triangular portion is used.
 | 
			
		||||
 | 
			
		||||
.. note:: The eigenvalues are returned in ascending order. If :attr:`input` is a batch of matrices,
 | 
			
		||||
          then the eigenvalues of each matrix in the batch is returned in ascending order.
 | 
			
		||||
 | 
			
		||||
.. note:: Irrespective of the original strides, the returned matrix `V` will
 | 
			
		||||
          be transposed, i.e. with strides `V.contiguous().transpose(-1, -2).stride()`.
 | 
			
		||||
 | 
			
		||||
@ -5782,7 +5800,7 @@ The upper triangular part of the matrix is defined as the elements on and
 | 
			
		||||
above the diagonal.
 | 
			
		||||
 | 
			
		||||
The argument :attr:`diagonal` controls which diagonal to consider. If
 | 
			
		||||
:attr:`diagonal` = 0, all elements on and below the main diagonal are
 | 
			
		||||
:attr:`diagonal` = 0, all elements on and above the main diagonal are
 | 
			
		||||
retained. A positive value excludes just as many diagonals above the main
 | 
			
		||||
diagonal, and similarly a negative value includes just as many diagonals below
 | 
			
		||||
the main diagonal. The main diagonal are the set of indices
 | 
			
		||||
 | 
			
		||||
@ -22,6 +22,8 @@ namespace datasets {
 | 
			
		||||
template <typename ExampleType_, typename ChunkType_ = std::vector<ExampleType_>>
 | 
			
		||||
class ChunkDataReader {
 | 
			
		||||
 public:
 | 
			
		||||
  virtual ~ChunkDataReader() = default;
 | 
			
		||||
 | 
			
		||||
  using ChunkType = ChunkType_;
 | 
			
		||||
  using ExampleType = ExampleType_;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -47,7 +47,7 @@ class Cloneable : public virtual Module {
 | 
			
		||||
        "parameters as the original module after calling reset(). "
 | 
			
		||||
        "Are you sure you called register_parameter() inside reset() "
 | 
			
		||||
        "and not the constructor?");
 | 
			
		||||
    for (const auto& parameter : parameters_) {
 | 
			
		||||
    for (const auto& parameter : named_parameters(/*recurse=*/false)) {
 | 
			
		||||
      auto& tensor = *parameter;
 | 
			
		||||
      auto data = device && tensor.device() != *device ?
 | 
			
		||||
          tensor.to(*device) : autograd::Variable(tensor).clone();
 | 
			
		||||
@ -59,7 +59,7 @@ class Cloneable : public virtual Module {
 | 
			
		||||
        "buffers as the original module after calling reset(). "
 | 
			
		||||
        "Are you sure you called register_buffer() inside reset() "
 | 
			
		||||
        "and not the constructor?");
 | 
			
		||||
    for (const auto& buffer : buffers_) {
 | 
			
		||||
    for (const auto& buffer : named_buffers(/*recurse=*/false)) {
 | 
			
		||||
      auto& tensor = *buffer;
 | 
			
		||||
      auto data = device && tensor.device() != *device ?
 | 
			
		||||
          tensor.to(*device) : autograd::Variable(tensor).clone();
 | 
			
		||||
 | 
			
		||||
@ -648,11 +648,11 @@ void Module::to_impl(Ts&&... ts) {
 | 
			
		||||
    child.value()->to(ts...);
 | 
			
		||||
  }
 | 
			
		||||
  // Then move every parameter to the new dtype/device.
 | 
			
		||||
  for (auto& parameter : parameters_) {
 | 
			
		||||
  for (auto& parameter : named_parameters(/*recurse=*/false)) {
 | 
			
		||||
    parameter->set_data(autograd::Variable(*parameter).to(ts...));
 | 
			
		||||
  }
 | 
			
		||||
  // Then move every buffer to the new dtype/device.
 | 
			
		||||
  for (auto& buffer : buffers_) {
 | 
			
		||||
  for (auto& buffer : named_buffers(/*recurse=*/false)) {
 | 
			
		||||
    buffer->set_data(autograd::Variable(*buffer).to(ts...));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -9,8 +9,6 @@
 | 
			
		||||
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
 | 
			
		||||
namespace F = torch::nn::functional;
 | 
			
		||||
 | 
			
		||||
namespace torch {
 | 
			
		||||
namespace nn {
 | 
			
		||||
 | 
			
		||||
@ -178,7 +176,7 @@ class BatchNormImplBase : public NormImplBase<D, Derived, BatchNormOptions> {
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return F::detail::batch_norm(
 | 
			
		||||
    return torch::nn::functional::detail::batch_norm(
 | 
			
		||||
        input,
 | 
			
		||||
        this->running_mean,
 | 
			
		||||
        this->running_var,
 | 
			
		||||
 | 
			
		||||
@ -17,9 +17,9 @@ namespace nn {
 | 
			
		||||
 | 
			
		||||
/// Base class for all (dimension-specialized) convolution modules.
 | 
			
		||||
template <size_t D, typename Derived>
 | 
			
		||||
class ConvImpl : public torch::nn::Cloneable<Derived> {
 | 
			
		||||
class ConvNdImpl : public torch::nn::Cloneable<Derived> {
 | 
			
		||||
 public:
 | 
			
		||||
  explicit ConvImpl(ConvOptions<D> options_) : options(std::move(options_)) {
 | 
			
		||||
  explicit ConvNdImpl(detail::ConvNdOptions<D> options_) : options(std::move(options_)) {
 | 
			
		||||
    reset();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -98,7 +98,7 @@ class ConvImpl : public torch::nn::Cloneable<Derived> {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /// The options with which this `Module` was constructed.
 | 
			
		||||
  ConvOptions<D> options;
 | 
			
		||||
  detail::ConvNdOptions<D> options;
 | 
			
		||||
 | 
			
		||||
  /// The learned kernel (or "weight").
 | 
			
		||||
  Tensor weight;
 | 
			
		||||
@ -112,15 +112,15 @@ class ConvImpl : public torch::nn::Cloneable<Derived> {
 | 
			
		||||
/// Applies convolution over a 1-D input.
 | 
			
		||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv1d to learn about
 | 
			
		||||
/// the exact behavior of this module.
 | 
			
		||||
class TORCH_API Conv1dImpl : public ConvImpl<1, Conv1dImpl> {
 | 
			
		||||
class TORCH_API Conv1dImpl : public ConvNdImpl<1, Conv1dImpl> {
 | 
			
		||||
 public:
 | 
			
		||||
  Conv1dImpl(
 | 
			
		||||
      int64_t input_channels,
 | 
			
		||||
      int64_t output_channels,
 | 
			
		||||
      ExpandingArray<1> kernel_size)
 | 
			
		||||
      : Conv1dImpl(ConvOptions<1>(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
      : Conv1dImpl(Conv1dOptions(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
  }
 | 
			
		||||
  explicit Conv1dImpl(ConvOptions<1> options_);
 | 
			
		||||
  explicit Conv1dImpl(Conv1dOptions options_);
 | 
			
		||||
  Tensor forward(const Tensor& input);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -135,15 +135,15 @@ TORCH_MODULE(Conv1d);
 | 
			
		||||
/// Applies convolution over a 2-D input.
 | 
			
		||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d to learn about
 | 
			
		||||
/// the exact behavior of this module.
 | 
			
		||||
class TORCH_API Conv2dImpl : public ConvImpl<2, Conv2dImpl> {
 | 
			
		||||
class TORCH_API Conv2dImpl : public ConvNdImpl<2, Conv2dImpl> {
 | 
			
		||||
 public:
 | 
			
		||||
  Conv2dImpl(
 | 
			
		||||
      int64_t input_channels,
 | 
			
		||||
      int64_t output_channels,
 | 
			
		||||
      ExpandingArray<2> kernel_size)
 | 
			
		||||
      : Conv2dImpl(ConvOptions<2>(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
      : Conv2dImpl(Conv2dOptions(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
  }
 | 
			
		||||
  explicit Conv2dImpl(ConvOptions<2> options_);
 | 
			
		||||
  explicit Conv2dImpl(Conv2dOptions options_);
 | 
			
		||||
  Tensor forward(const Tensor& input);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -158,15 +158,15 @@ TORCH_MODULE(Conv2d);
 | 
			
		||||
/// Applies convolution over a 3-D input.
 | 
			
		||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv3d to learn about
 | 
			
		||||
/// the exact behavior of this module.
 | 
			
		||||
class TORCH_API Conv3dImpl : public ConvImpl<3, Conv3dImpl> {
 | 
			
		||||
class TORCH_API Conv3dImpl : public ConvNdImpl<3, Conv3dImpl> {
 | 
			
		||||
 public:
 | 
			
		||||
  Conv3dImpl(
 | 
			
		||||
      int64_t input_channels,
 | 
			
		||||
      int64_t output_channels,
 | 
			
		||||
      ExpandingArray<3> kernel_size)
 | 
			
		||||
      : Conv3dImpl(ConvOptions<3>(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
      : Conv3dImpl(Conv3dOptions(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
  }
 | 
			
		||||
  explicit Conv3dImpl(ConvOptions<3> options_);
 | 
			
		||||
  explicit Conv3dImpl(Conv3dOptions options_);
 | 
			
		||||
  Tensor forward(const Tensor& input);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -180,9 +180,9 @@ TORCH_MODULE(Conv3d);
 | 
			
		||||
 | 
			
		||||
/// Base class for all (dimension-specialized) convolution transpose modules.
 | 
			
		||||
template <size_t D, typename Derived>
 | 
			
		||||
class ConvTransposeImpl : public ConvImpl<D, Derived> {
 | 
			
		||||
class ConvTransposeNdImpl : public ConvNdImpl<D, Derived> {
 | 
			
		||||
 public:
 | 
			
		||||
  using torch::nn::ConvImpl<D, Derived>::ConvImpl;
 | 
			
		||||
  using torch::nn::ConvNdImpl<D, Derived>::ConvNdImpl;
 | 
			
		||||
 | 
			
		||||
  /// Pretty prints the `ConvTranspose{1,2,3}d` module into the given `stream`.
 | 
			
		||||
  void pretty_print(std::ostream& stream) const override {
 | 
			
		||||
@ -224,15 +224,15 @@ class ConvTransposeImpl : public ConvImpl<D, Derived> {
 | 
			
		||||
/// Applies the ConvTranspose1d function.
 | 
			
		||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose1d to
 | 
			
		||||
/// learn about the exact behavior of this module.
 | 
			
		||||
class TORCH_API ConvTranspose1dImpl : public ConvTransposeImpl<1, ConvTranspose1dImpl> {
 | 
			
		||||
class TORCH_API ConvTranspose1dImpl : public ConvTransposeNdImpl<1, ConvTranspose1dImpl> {
 | 
			
		||||
 public:
 | 
			
		||||
  ConvTranspose1dImpl(
 | 
			
		||||
      int64_t input_channels,
 | 
			
		||||
      int64_t output_channels,
 | 
			
		||||
      ExpandingArray<1> kernel_size)
 | 
			
		||||
      : ConvTranspose1dImpl(ConvTransposeOptions<1>(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
      : ConvTranspose1dImpl(ConvTranspose1dOptions(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
  }
 | 
			
		||||
  explicit ConvTranspose1dImpl(ConvTransposeOptions<1> options_);
 | 
			
		||||
  explicit ConvTranspose1dImpl(ConvTranspose1dOptions options_);
 | 
			
		||||
  Tensor forward(const Tensor& input,
 | 
			
		||||
                 const c10::optional<at::IntArrayRef>& output_size = c10::nullopt);
 | 
			
		||||
};
 | 
			
		||||
@ -244,15 +244,15 @@ TORCH_MODULE(ConvTranspose1d);
 | 
			
		||||
/// Applies the ConvTranspose2d function.
 | 
			
		||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose2d to
 | 
			
		||||
/// learn about the exact behavior of this module.
 | 
			
		||||
class TORCH_API ConvTranspose2dImpl : public ConvTransposeImpl<2, ConvTranspose2dImpl> {
 | 
			
		||||
class TORCH_API ConvTranspose2dImpl : public ConvTransposeNdImpl<2, ConvTranspose2dImpl> {
 | 
			
		||||
 public:
 | 
			
		||||
  ConvTranspose2dImpl(
 | 
			
		||||
      int64_t input_channels,
 | 
			
		||||
      int64_t output_channels,
 | 
			
		||||
      ExpandingArray<2> kernel_size)
 | 
			
		||||
      : ConvTranspose2dImpl(ConvTransposeOptions<2>(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
      : ConvTranspose2dImpl(ConvTranspose2dOptions(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
  }
 | 
			
		||||
  explicit ConvTranspose2dImpl(ConvTransposeOptions<2> options_);
 | 
			
		||||
  explicit ConvTranspose2dImpl(ConvTranspose2dOptions options_);
 | 
			
		||||
  Tensor forward(const Tensor& input,
 | 
			
		||||
                 const c10::optional<at::IntArrayRef>& output_size = c10::nullopt);
 | 
			
		||||
};
 | 
			
		||||
@ -264,15 +264,15 @@ TORCH_MODULE(ConvTranspose2d);
 | 
			
		||||
/// Applies the ConvTranspose3d function.
 | 
			
		||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose3d to
 | 
			
		||||
/// learn about the exact behavior of this module.
 | 
			
		||||
class TORCH_API ConvTranspose3dImpl : public ConvTransposeImpl<3, ConvTranspose3dImpl> {
 | 
			
		||||
class TORCH_API ConvTranspose3dImpl : public ConvTransposeNdImpl<3, ConvTranspose3dImpl> {
 | 
			
		||||
 public:
 | 
			
		||||
  ConvTranspose3dImpl(
 | 
			
		||||
      int64_t input_channels,
 | 
			
		||||
      int64_t output_channels,
 | 
			
		||||
      ExpandingArray<3> kernel_size)
 | 
			
		||||
      : ConvTranspose3dImpl(ConvTransposeOptions<3>(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
      : ConvTranspose3dImpl(ConvTranspose3dOptions(input_channels, output_channels, kernel_size)) {
 | 
			
		||||
  }
 | 
			
		||||
  explicit ConvTranspose3dImpl(ConvTransposeOptions<3> options_);
 | 
			
		||||
  explicit ConvTranspose3dImpl(ConvTranspose3dOptions options_);
 | 
			
		||||
  Tensor forward(const Tensor& input,
 | 
			
		||||
                 const c10::optional<at::IntArrayRef>& output_size = c10::nullopt);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,7 @@ class InstanceNormImpl : public torch::nn::NormImplBase<D, Derived, InstanceNorm
 | 
			
		||||
 | 
			
		||||
  Tensor forward(const Tensor& input) {
 | 
			
		||||
    this->_check_input_dim(input);
 | 
			
		||||
    return F::detail::instance_norm(
 | 
			
		||||
    return torch::nn::functional::detail::instance_norm(
 | 
			
		||||
      input, this->running_mean, this->running_var, this->weight, this->bias,
 | 
			
		||||
      this->is_training() || !this->options.track_running_stats(), this->options.momentum(), this->options.eps());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@ -9,12 +9,14 @@
 | 
			
		||||
namespace torch {
 | 
			
		||||
namespace nn {
 | 
			
		||||
 | 
			
		||||
/// Options for a `D`-dimensional convolution module.
 | 
			
		||||
template <size_t D>
 | 
			
		||||
struct ConvOptions {
 | 
			
		||||
  typedef c10::variant<enumtype::kZeros, enumtype::kCircular> padding_mode_t;
 | 
			
		||||
namespace detail {
 | 
			
		||||
 | 
			
		||||
  ConvOptions(
 | 
			
		||||
typedef c10::variant<enumtype::kZeros, enumtype::kCircular> conv_padding_mode_t;
 | 
			
		||||
 | 
			
		||||
/// Options for a `D`-dimensional convolution or convolution transpose module.
 | 
			
		||||
template <size_t D>
 | 
			
		||||
struct ConvNdOptions {
 | 
			
		||||
  ConvNdOptions(
 | 
			
		||||
      int64_t in_channels,
 | 
			
		||||
      int64_t out_channels,
 | 
			
		||||
      ExpandingArray<D> kernel_size) :
 | 
			
		||||
@ -73,6 +75,67 @@ struct ConvOptions {
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(bool, bias) = true;
 | 
			
		||||
 | 
			
		||||
  /// Accepted values `zeros` and `circular` Default: `zeros`
 | 
			
		||||
  TORCH_ARG(conv_padding_mode_t, padding_mode) = torch::kZeros;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace detail
 | 
			
		||||
 | 
			
		||||
// ============================================================================
 | 
			
		||||
 | 
			
		||||
/// Options for a `D`-dimensional convolution module.
 | 
			
		||||
template <size_t D>
 | 
			
		||||
struct ConvOptions {
 | 
			
		||||
  using padding_mode_t = detail::conv_padding_mode_t;
 | 
			
		||||
 | 
			
		||||
  ConvOptions(
 | 
			
		||||
      int64_t in_channels,
 | 
			
		||||
      int64_t out_channels,
 | 
			
		||||
      ExpandingArray<D> kernel_size) :
 | 
			
		||||
                in_channels_(in_channels),
 | 
			
		||||
                out_channels_(out_channels),
 | 
			
		||||
                kernel_size_(std::move(kernel_size)) {}
 | 
			
		||||
 | 
			
		||||
  /// The number of channels the input volumes will have.
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(int64_t, in_channels);
 | 
			
		||||
 | 
			
		||||
  /// The number of output channels the convolution should produce.
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(int64_t, out_channels);
 | 
			
		||||
 | 
			
		||||
  /// The kernel size to use.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, kernel_size);
 | 
			
		||||
 | 
			
		||||
  /// The stride of the convolution.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, stride) = 1;
 | 
			
		||||
 | 
			
		||||
  /// The padding to add to the input volumes.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, padding) = 0;
 | 
			
		||||
 | 
			
		||||
  /// The kernel dilation.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, dilation) = 1;
 | 
			
		||||
 | 
			
		||||
  /// The number of convolution groups.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(int64_t, groups) = 1;
 | 
			
		||||
 | 
			
		||||
  /// Whether to add a bias after individual applications of the kernel.
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(bool, bias) = true;
 | 
			
		||||
 | 
			
		||||
  /// Accepted values `zeros` and `circular` Default: `zeros`
 | 
			
		||||
  TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros;
 | 
			
		||||
};
 | 
			
		||||
@ -129,8 +192,67 @@ using Conv3dFuncOptions = ConvFuncOptions<3>;
 | 
			
		||||
 | 
			
		||||
// ============================================================================
 | 
			
		||||
 | 
			
		||||
template<size_t D>
 | 
			
		||||
using ConvTransposeOptions = ConvOptions<D>;
 | 
			
		||||
template <size_t D>
 | 
			
		||||
struct ConvTransposeOptions {
 | 
			
		||||
  using padding_mode_t = detail::conv_padding_mode_t;
 | 
			
		||||
 | 
			
		||||
  ConvTransposeOptions(
 | 
			
		||||
      int64_t in_channels,
 | 
			
		||||
      int64_t out_channels,
 | 
			
		||||
      ExpandingArray<D> kernel_size) :
 | 
			
		||||
                in_channels_(in_channels),
 | 
			
		||||
                out_channels_(out_channels),
 | 
			
		||||
                kernel_size_(std::move(kernel_size)) {}
 | 
			
		||||
 | 
			
		||||
  /// The number of channels the input volumes will have.
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(int64_t, in_channels);
 | 
			
		||||
 | 
			
		||||
  /// The number of output channels the convolution should produce.
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(int64_t, out_channels);
 | 
			
		||||
 | 
			
		||||
  /// The kernel size to use.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, kernel_size);
 | 
			
		||||
 | 
			
		||||
  /// The stride of the convolution.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, stride) = 1;
 | 
			
		||||
 | 
			
		||||
  /// The padding to add to the input volumes.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, padding) = 0;
 | 
			
		||||
 | 
			
		||||
  /// For transpose convolutions, the padding to add to output volumes.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, output_padding) = 0;
 | 
			
		||||
 | 
			
		||||
  /// The number of convolution groups.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(int64_t, groups) = 1;
 | 
			
		||||
 | 
			
		||||
  /// Whether to add a bias after individual applications of the kernel.
 | 
			
		||||
  /// Changing this parameter after construction __has no effect__.
 | 
			
		||||
  TORCH_ARG(bool, bias) = true;
 | 
			
		||||
 | 
			
		||||
  /// The kernel dilation.
 | 
			
		||||
  /// For a `D`-dim convolution, must be a single number or a list of `D`
 | 
			
		||||
  /// numbers.
 | 
			
		||||
  /// This parameter __can__ be changed after construction.
 | 
			
		||||
  TORCH_ARG(ExpandingArray<D>, dilation) = 1;
 | 
			
		||||
 | 
			
		||||
  /// Accepted values `zeros` and `circular` Default: `zeros`
 | 
			
		||||
  TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// `ConvTransposeOptions` specialized for 1-D convolution.
 | 
			
		||||
using ConvTranspose1dOptions = ConvTransposeOptions<1>;
 | 
			
		||||
 | 
			
		||||
@ -100,7 +100,7 @@ void replicate_grad_edges(
 | 
			
		||||
    const std::vector<std::shared_ptr<ModuleType>>& replicas,
 | 
			
		||||
    const std::vector<Device>& devices) {
 | 
			
		||||
 | 
			
		||||
  for (auto& parameter : module->parameters_) {
 | 
			
		||||
  for (auto& parameter : module->named_parameters(/*recurse=*/false)) {
 | 
			
		||||
    auto grad_fn = std::make_shared<ReduceAdd>((*parameter).device());
 | 
			
		||||
    grad_fn->set_next_edges(autograd::collect_next_edges(*parameter));
 | 
			
		||||
 | 
			
		||||
@ -109,7 +109,7 @@ void replicate_grad_edges(
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (auto& buffer : module->buffers_) {
 | 
			
		||||
  for (auto& buffer : module->named_buffers(/*recurse=*/false)) {
 | 
			
		||||
    if (buffer.value().requires_grad()){
 | 
			
		||||
      auto grad_fn = std::make_shared<ReduceAdd>((*buffer).device());
 | 
			
		||||
      grad_fn->set_next_edges(autograd::collect_next_edges(*buffer));
 | 
			
		||||
 | 
			
		||||
@ -32,15 +32,6 @@ std::string join_name(const std::string& name_prefix, const std::string& name) {
 | 
			
		||||
  full_name += name;
 | 
			
		||||
  return full_name;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void extend(
 | 
			
		||||
    std::vector<Tensor>& vector,
 | 
			
		||||
    const OrderedDict<std::string, Tensor>& dict) {
 | 
			
		||||
  vector.reserve(vector.size() + dict.size());
 | 
			
		||||
  for (const auto& item : dict) {
 | 
			
		||||
    vector.push_back(item.value());
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
} // namespace
 | 
			
		||||
 | 
			
		||||
Module::Module()
 | 
			
		||||
@ -141,46 +132,48 @@ void Module::apply(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<Tensor> Module::parameters(bool recurse) const {
 | 
			
		||||
  if (!recurse) {
 | 
			
		||||
    return parameters_.values();
 | 
			
		||||
  }
 | 
			
		||||
  std::vector<Tensor> result;
 | 
			
		||||
  apply(
 | 
			
		||||
      [&result](const Module& module) { extend(result, module.parameters_); });
 | 
			
		||||
  return result;
 | 
			
		||||
  return named_parameters(recurse).values();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OrderedDict<std::string, Tensor> Module::named_parameters(bool recurse) const {
 | 
			
		||||
  if (!recurse) {
 | 
			
		||||
    return parameters_;
 | 
			
		||||
  }
 | 
			
		||||
  OrderedDict<std::string, Tensor> result;
 | 
			
		||||
  apply([&result](const std::string& name, const Module& module) {
 | 
			
		||||
    for (const auto& parameter : module.parameters_) {
 | 
			
		||||
      result.insert(join_name(name, parameter.key()), parameter.value());
 | 
			
		||||
  if (!recurse) {
 | 
			
		||||
    for (const auto& parameter : parameters_) {
 | 
			
		||||
      if (parameter.value().defined()) {
 | 
			
		||||
        result.insert(parameter.key(), parameter.value());
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
  } else {
 | 
			
		||||
    apply([&result](const std::string& name, const Module& module) {
 | 
			
		||||
      for (const auto& parameter : module.named_parameters(/*recurse=*/false)) {
 | 
			
		||||
        TORCH_INTERNAL_ASSERT(parameter.value().defined());
 | 
			
		||||
        result.insert(join_name(name, parameter.key()), parameter.value());
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<Tensor> Module::buffers(bool recurse) const {
 | 
			
		||||
  if (!recurse) {
 | 
			
		||||
    return buffers_.values();
 | 
			
		||||
  }
 | 
			
		||||
  std::vector<Tensor> result;
 | 
			
		||||
  apply([&result](const Module& module) { extend(result, module.buffers_); });
 | 
			
		||||
  return result;
 | 
			
		||||
  return named_buffers(recurse).values();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
OrderedDict<std::string, Tensor> Module::named_buffers(bool recurse) const {
 | 
			
		||||
  if (!recurse) {
 | 
			
		||||
    return buffers_;
 | 
			
		||||
  }
 | 
			
		||||
  OrderedDict<std::string, Tensor> result;
 | 
			
		||||
  apply([&result](const std::string& name, const Module& module) {
 | 
			
		||||
    for (const auto& buffer : module.buffers_) {
 | 
			
		||||
      result.insert(join_name(name, buffer.key()), buffer.value());
 | 
			
		||||
  if (!recurse) {
 | 
			
		||||
    for (const auto& buffer : buffers_) {
 | 
			
		||||
      if (buffer.value().defined()) {
 | 
			
		||||
        result.insert(buffer.key(), buffer.value());
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
  } else {
 | 
			
		||||
    apply([&result](const std::string& name, const Module& module) {
 | 
			
		||||
      for (const auto& buffer : module.named_buffers(/*recurse=*/false)) {
 | 
			
		||||
        TORCH_INTERNAL_ASSERT(buffer.value().defined());
 | 
			
		||||
        result.insert(join_name(name, buffer.key()), buffer.value());
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -261,7 +254,7 @@ void Module::zero_grad() {
 | 
			
		||||
  for (auto& child : children_) {
 | 
			
		||||
    child.value()->zero_grad();
 | 
			
		||||
  }
 | 
			
		||||
  for (auto& parameter : parameters_) {
 | 
			
		||||
  for (auto& parameter : named_parameters(/*recurse=*/false)) {
 | 
			
		||||
    auto& grad = parameter->grad();
 | 
			
		||||
    if (grad.defined()) {
 | 
			
		||||
      grad = grad.detach();
 | 
			
		||||
@ -271,10 +264,10 @@ void Module::zero_grad() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Module::save(serialize::OutputArchive& archive) const {
 | 
			
		||||
  for (const auto& parameter : parameters_) {
 | 
			
		||||
  for (const auto& parameter : named_parameters(/*recurse=*/false)) {
 | 
			
		||||
    archive.write(parameter.key(), parameter.value());
 | 
			
		||||
  }
 | 
			
		||||
  for (const auto& buffer : buffers_) {
 | 
			
		||||
  for (const auto& buffer : named_buffers(/*recurse=*/false)) {
 | 
			
		||||
    archive.write(buffer.key(), buffer.value(), /*is_buffer=*/true);
 | 
			
		||||
  }
 | 
			
		||||
  for (const auto& child : children_) {
 | 
			
		||||
@ -287,10 +280,10 @@ void Module::save(serialize::OutputArchive& archive) const {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Module::load(serialize::InputArchive& archive) {
 | 
			
		||||
  for (auto& parameter : parameters_) {
 | 
			
		||||
  for (auto& parameter : named_parameters(/*recurse=*/false)) {
 | 
			
		||||
    archive.read(parameter.key(), parameter.value());
 | 
			
		||||
  }
 | 
			
		||||
  for (auto& buffer : buffers_) {
 | 
			
		||||
  for (auto& buffer : named_buffers(/*recurse=*/false)) {
 | 
			
		||||
    archive.read(buffer.key(), buffer.value(), /*is_buffer=*/true);
 | 
			
		||||
  }
 | 
			
		||||
  for (const auto& child : children_) {
 | 
			
		||||
 | 
			
		||||
@ -19,8 +19,20 @@ namespace F = torch::nn::functional;
 | 
			
		||||
namespace torch {
 | 
			
		||||
namespace nn {
 | 
			
		||||
Conv1dImpl::Conv1dImpl(
 | 
			
		||||
    ConvOptions<1> options_)
 | 
			
		||||
    : ConvImpl(options_.transposed(false).output_padding(0)) {}
 | 
			
		||||
    Conv1dOptions options_)
 | 
			
		||||
    : ConvNdImpl(
 | 
			
		||||
        detail::ConvNdOptions<1>(
 | 
			
		||||
          /*in_channels=*/options_.in_channels(),
 | 
			
		||||
          /*out_channels=*/options_.out_channels(),
 | 
			
		||||
          /*kernel_size=*/options_.kernel_size())
 | 
			
		||||
          .stride(options_.stride())
 | 
			
		||||
          .padding(options_.padding())
 | 
			
		||||
          .dilation(options_.dilation())
 | 
			
		||||
          .transposed(false)
 | 
			
		||||
          .output_padding(0)
 | 
			
		||||
          .groups(options_.groups())
 | 
			
		||||
          .bias(options_.bias())
 | 
			
		||||
          .padding_mode(options_.padding_mode())) {}
 | 
			
		||||
 | 
			
		||||
Tensor Conv1dImpl::forward(const Tensor& input) {
 | 
			
		||||
  if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) {
 | 
			
		||||
@ -44,8 +56,20 @@ Tensor Conv1dImpl::forward(const Tensor& input) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Conv2dImpl::Conv2dImpl(
 | 
			
		||||
    ConvOptions<2> options_)
 | 
			
		||||
    : ConvImpl(options_.transposed(false).output_padding(0)) {}
 | 
			
		||||
    Conv2dOptions options_)
 | 
			
		||||
    : ConvNdImpl(
 | 
			
		||||
        detail::ConvNdOptions<2>(
 | 
			
		||||
          /*in_channels=*/options_.in_channels(),
 | 
			
		||||
          /*out_channels=*/options_.out_channels(),
 | 
			
		||||
          /*kernel_size=*/options_.kernel_size())
 | 
			
		||||
          .stride(options_.stride())
 | 
			
		||||
          .padding(options_.padding())
 | 
			
		||||
          .dilation(options_.dilation())
 | 
			
		||||
          .transposed(false)
 | 
			
		||||
          .output_padding(0)
 | 
			
		||||
          .groups(options_.groups())
 | 
			
		||||
          .bias(options_.bias())
 | 
			
		||||
          .padding_mode(options_.padding_mode())) {}
 | 
			
		||||
 | 
			
		||||
Tensor Conv2dImpl::forward(const Tensor& input) {
 | 
			
		||||
  if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) {
 | 
			
		||||
@ -71,8 +95,20 @@ Tensor Conv2dImpl::forward(const Tensor& input) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Conv3dImpl::Conv3dImpl(
 | 
			
		||||
    ConvOptions<3> options_)
 | 
			
		||||
    : ConvImpl(options_.transposed(false).output_padding(0)) {}
 | 
			
		||||
    Conv3dOptions options_)
 | 
			
		||||
    : ConvNdImpl(
 | 
			
		||||
        detail::ConvNdOptions<3>(
 | 
			
		||||
          /*in_channels=*/options_.in_channels(),
 | 
			
		||||
          /*out_channels=*/options_.out_channels(),
 | 
			
		||||
          /*kernel_size=*/options_.kernel_size())
 | 
			
		||||
          .stride(options_.stride())
 | 
			
		||||
          .padding(options_.padding())
 | 
			
		||||
          .dilation(options_.dilation())
 | 
			
		||||
          .transposed(false)
 | 
			
		||||
          .output_padding(0)
 | 
			
		||||
          .groups(options_.groups())
 | 
			
		||||
          .bias(options_.bias())
 | 
			
		||||
          .padding_mode(options_.padding_mode())) {}
 | 
			
		||||
 | 
			
		||||
Tensor Conv3dImpl::forward(const Tensor& input) {
 | 
			
		||||
  if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) {
 | 
			
		||||
@ -98,14 +134,14 @@ Tensor Conv3dImpl::forward(const Tensor& input) {
 | 
			
		||||
    options.groups());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template class ConvImpl<1, Conv1dImpl>;
 | 
			
		||||
template class ConvImpl<2, Conv2dImpl>;
 | 
			
		||||
template class ConvImpl<3, Conv3dImpl>;
 | 
			
		||||
template class ConvNdImpl<1, Conv1dImpl>;
 | 
			
		||||
template class ConvNdImpl<2, Conv2dImpl>;
 | 
			
		||||
template class ConvNdImpl<3, Conv3dImpl>;
 | 
			
		||||
 | 
			
		||||
// ============================================================================
 | 
			
		||||
 | 
			
		||||
template <size_t D, typename Derived>
 | 
			
		||||
std::vector<int64_t> ConvTransposeImpl<D, Derived>::_output_padding(
 | 
			
		||||
std::vector<int64_t> ConvTransposeNdImpl<D, Derived>::_output_padding(
 | 
			
		||||
    const Tensor& input, const c10::optional<at::IntArrayRef>& output_size,
 | 
			
		||||
    const ExpandingArray<D>& stride, const ExpandingArray<D>& padding,
 | 
			
		||||
    const ExpandingArray<D>& kernel_size) {
 | 
			
		||||
@ -151,7 +187,20 @@ std::vector<int64_t> ConvTransposeImpl<D, Derived>::_output_padding(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ConvTranspose1dImpl::ConvTranspose1dImpl(
 | 
			
		||||
    ConvTransposeOptions<1> options_) : ConvTransposeImpl(options_.transposed(true)) {}
 | 
			
		||||
    ConvTranspose1dOptions options_)
 | 
			
		||||
    : ConvTransposeNdImpl(
 | 
			
		||||
        detail::ConvNdOptions<1>(
 | 
			
		||||
          /*in_channels=*/options_.in_channels(),
 | 
			
		||||
          /*out_channels=*/options_.out_channels(),
 | 
			
		||||
          /*kernel_size=*/options_.kernel_size())
 | 
			
		||||
          .stride(options_.stride())
 | 
			
		||||
          .padding(options_.padding())
 | 
			
		||||
          .dilation(options_.dilation())
 | 
			
		||||
          .transposed(true)
 | 
			
		||||
          .output_padding(options_.output_padding())
 | 
			
		||||
          .groups(options_.groups())
 | 
			
		||||
          .bias(options_.bias())
 | 
			
		||||
          .padding_mode(options_.padding_mode())) {}
 | 
			
		||||
 | 
			
		||||
Tensor ConvTranspose1dImpl::forward(
 | 
			
		||||
    const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) {
 | 
			
		||||
@ -168,7 +217,19 @@ Tensor ConvTranspose1dImpl::forward(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ConvTranspose2dImpl::ConvTranspose2dImpl(
 | 
			
		||||
    ConvTransposeOptions<2> options_) : ConvTransposeImpl(options_.transposed(true)) {}
 | 
			
		||||
    ConvTranspose2dOptions options_)
 | 
			
		||||
    : ConvTransposeNdImpl(detail::ConvNdOptions<2>(
 | 
			
		||||
          /*in_channels=*/options_.in_channels(),
 | 
			
		||||
          /*out_channels=*/options_.out_channels(),
 | 
			
		||||
          /*kernel_size=*/options_.kernel_size())
 | 
			
		||||
          .stride(options_.stride())
 | 
			
		||||
          .padding(options_.padding())
 | 
			
		||||
          .dilation(options_.dilation())
 | 
			
		||||
          .transposed(true)
 | 
			
		||||
          .output_padding(options_.output_padding())
 | 
			
		||||
          .groups(options_.groups())
 | 
			
		||||
          .bias(options_.bias())
 | 
			
		||||
          .padding_mode(options_.padding_mode())) {}
 | 
			
		||||
 | 
			
		||||
Tensor ConvTranspose2dImpl::forward(
 | 
			
		||||
    const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) {
 | 
			
		||||
@ -185,7 +246,19 @@ Tensor ConvTranspose2dImpl::forward(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ConvTranspose3dImpl::ConvTranspose3dImpl(
 | 
			
		||||
    ConvTransposeOptions<3> options_) : ConvTransposeImpl(options_.transposed(true)) {}
 | 
			
		||||
    ConvTranspose3dOptions options_)
 | 
			
		||||
    : ConvTransposeNdImpl(detail::ConvNdOptions<3>(
 | 
			
		||||
          /*in_channels=*/options_.in_channels(),
 | 
			
		||||
          /*out_channels=*/options_.out_channels(),
 | 
			
		||||
          /*kernel_size=*/options_.kernel_size())
 | 
			
		||||
          .stride(options_.stride())
 | 
			
		||||
          .padding(options_.padding())
 | 
			
		||||
          .dilation(options_.dilation())
 | 
			
		||||
          .transposed(true)
 | 
			
		||||
          .output_padding(options_.output_padding())
 | 
			
		||||
          .groups(options_.groups())
 | 
			
		||||
          .bias(options_.bias())
 | 
			
		||||
          .padding_mode(options_.padding_mode())) {}
 | 
			
		||||
 | 
			
		||||
Tensor ConvTranspose3dImpl::forward(
 | 
			
		||||
    const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) {
 | 
			
		||||
@ -201,9 +274,9 @@ Tensor ConvTranspose3dImpl::forward(
 | 
			
		||||
    output_padding, options.groups(), options.dilation());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template class ConvTransposeImpl<1, ConvTranspose1dImpl>;
 | 
			
		||||
template class ConvTransposeImpl<2, ConvTranspose2dImpl>;
 | 
			
		||||
template class ConvTransposeImpl<3, ConvTranspose3dImpl>;
 | 
			
		||||
template class ConvTransposeNdImpl<1, ConvTranspose1dImpl>;
 | 
			
		||||
template class ConvTransposeNdImpl<2, ConvTranspose2dImpl>;
 | 
			
		||||
template class ConvTransposeNdImpl<3, ConvTranspose3dImpl>;
 | 
			
		||||
 | 
			
		||||
} // namespace nn
 | 
			
		||||
} // namespace torch
 | 
			
		||||
 | 
			
		||||
@ -36,7 +36,8 @@ PyObject* rpc_init(PyObject* /* unused */) {
 | 
			
		||||
 | 
			
		||||
  auto rpcBackendOptions =
 | 
			
		||||
      shared_ptr_class_<RpcBackendOptions>(module, "RpcBackendOptions")
 | 
			
		||||
          .def_readwrite("rpc_timeout", &RpcBackendOptions::rpcTimeout);
 | 
			
		||||
          .def_readwrite("rpc_timeout", &RpcBackendOptions::rpcTimeout)
 | 
			
		||||
          .def_readwrite("init_method", &RpcBackendOptions::initMethod);
 | 
			
		||||
 | 
			
		||||
  auto workerInfo =
 | 
			
		||||
      shared_ptr_class_<WorkerInfo>(
 | 
			
		||||
@ -111,9 +112,9 @@ Otherwise, throws an exception.
 | 
			
		||||
                return PyRRef::unpickle(t);
 | 
			
		||||
              }));
 | 
			
		||||
 | 
			
		||||
  // future.wait() should not be called after wait_all_workers(), e.g.,
 | 
			
		||||
  // pythonRpcHandler is cleaned up in wait_all_workers(), after
 | 
			
		||||
  // wait_all_workers(), python objects returned from rpc python call can not be
 | 
			
		||||
  // future.wait() should not be called after shutdown(), e.g.,
 | 
			
		||||
  // pythonRpcHandler is cleaned up in shutdown(), after
 | 
			
		||||
  // shutdown(), python objects returned from rpc python call can not be
 | 
			
		||||
  // resolved.
 | 
			
		||||
  auto futureMessage =
 | 
			
		||||
      shared_ptr_class_<FutureMessage>(module, "FutureMessage")
 | 
			
		||||
@ -154,6 +155,10 @@ Otherwise, throws an exception.
 | 
			
		||||
          "join",
 | 
			
		||||
          &ProcessGroupAgent::join,
 | 
			
		||||
          py::call_guard<py::gil_scoped_release>())
 | 
			
		||||
      .def(
 | 
			
		||||
          "shutdown",
 | 
			
		||||
          &ProcessGroupAgent::shutdown,
 | 
			
		||||
          py::call_guard<py::gil_scoped_release>())
 | 
			
		||||
      .def(
 | 
			
		||||
          "sync",
 | 
			
		||||
          &ProcessGroupAgent::sync,
 | 
			
		||||
@ -164,8 +169,8 @@ Otherwise, throws an exception.
 | 
			
		||||
    agent->start();
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  module.def("_destroy_rref_context", []() {
 | 
			
		||||
    RRefContext::getInstance().destroyInstance();
 | 
			
		||||
  module.def("_destroy_rref_context", [](bool ignoreRRefLeak) {
 | 
			
		||||
    RRefContext::getInstance().destroyInstance(ignoreRRefLeak);
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  module.def("_cleanup_python_rpc_handler", []() {
 | 
			
		||||
 | 
			
		||||
@ -127,7 +127,6 @@ ProcessGroupAgent::ProcessGroupAgent(
 | 
			
		||||
          WorkerInfo(std::move(workerName), pg->getRank()),
 | 
			
		||||
          c10::guts::make_unique<RequestCallbackImpl>(),
 | 
			
		||||
          rpcTimeout),
 | 
			
		||||
      shutdown_{false},
 | 
			
		||||
      pg_(std::move(pg)),
 | 
			
		||||
      sendCounts_(pg_->getSize()),
 | 
			
		||||
      recvCounts_(pg_->getSize()),
 | 
			
		||||
@ -180,30 +179,12 @@ const WorkerInfo& ProcessGroupAgent::getWorkerInfo(worker_id_t id) const {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ProcessGroupAgent::join() {
 | 
			
		||||
  // Every process i sends a SHUTDOWN message to process i + 1. This is
 | 
			
		||||
  // necessary for now because:
 | 
			
		||||
  // 1. There is no abort API for ProcessGroup::recvAnysource yet. We have to
 | 
			
		||||
  //    feed it a message or kill the thread.
 | 
			
		||||
  // 2. A GLOO process cannot send message to itself. (there is an ongoing
 | 
			
		||||
  //    effort to fix this problem).
 | 
			
		||||
  shutdown_.store(true);
 | 
			
		||||
  sync();
 | 
			
		||||
  // This is needed in case no futures were created, otherwise the future
 | 
			
		||||
  // timeout watchdog would sleep forever.
 | 
			
		||||
 | 
			
		||||
  futureTimeoutCV_.notify_one();
 | 
			
		||||
  std::unique_lock<std::mutex> lock(futureMutex_);
 | 
			
		||||
  futureCV_.wait(
 | 
			
		||||
      lock, [this] { return futures_.empty() && futureTimeouts_.empty(); });
 | 
			
		||||
  lock.unlock();
 | 
			
		||||
  pg_->barrier()->wait();
 | 
			
		||||
  int dst = (pg_->getRank() + 1) % pg_->getSize();
 | 
			
		||||
  enqueueSend(
 | 
			
		||||
      SendWork(allWorkerInfo_[dst], Message({}, {}, MessageType::SHUTDOWN)));
 | 
			
		||||
  threadPool_.waitWorkComplete();
 | 
			
		||||
  listenerThread_.join();
 | 
			
		||||
  futureTimeoutThread_.join();
 | 
			
		||||
  PythonRpcHandler::getInstance().cleanup();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool ProcessGroupAgent::hasPendingMessage() {
 | 
			
		||||
@ -269,14 +250,38 @@ void ProcessGroupAgent::sync() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ProcessGroupAgent::start() {
 | 
			
		||||
  {
 | 
			
		||||
    std::lock_guard<std::mutex> futureLock{futureMutex_};
 | 
			
		||||
    rpcRunning_.store(true);
 | 
			
		||||
  }
 | 
			
		||||
  listenerThread_ = std::thread(&ProcessGroupAgent::listenLoop, this);
 | 
			
		||||
  futureTimeoutThread_ =
 | 
			
		||||
      std::thread(&ProcessGroupAgent::pollTimedOutRPCs, this);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ProcessGroupAgent::shutdown() {
 | 
			
		||||
  LOG(INFO) << "Shutting down ProcessGroupAgent.";
 | 
			
		||||
  std::unique_lock<std::mutex> lock{futureMutex_};
 | 
			
		||||
  if (!rpcRunning_.exchange(false)) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  lock.unlock();
 | 
			
		||||
  futureTimeoutCV_.notify_one();
 | 
			
		||||
  futureTimeoutThread_.join();
 | 
			
		||||
  {
 | 
			
		||||
    std::unique_lock<std::mutex> lock(recvWorkMutex_);
 | 
			
		||||
    if (recvWork_) {
 | 
			
		||||
      recvWork_->abort();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  threadPool_.waitWorkComplete();
 | 
			
		||||
  listenerThread_.join();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::shared_ptr<FutureMessage> ProcessGroupAgent::send(
 | 
			
		||||
    const WorkerInfo& to,
 | 
			
		||||
    Message&& message) {
 | 
			
		||||
  TORCH_CHECK(rpcRunning_.load(), "ProcessGroupAgent hasn't started.")
 | 
			
		||||
  TORCH_CHECK(
 | 
			
		||||
      to.id_ < (worker_id_t)pg_->getSize(),
 | 
			
		||||
      "Destination rank is out of bound, got ",
 | 
			
		||||
@ -456,10 +461,19 @@ void ProcessGroupAgent::enqueueRecv(RecvWork work) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ProcessGroupAgent::listenLoop() {
 | 
			
		||||
  while (true) {
 | 
			
		||||
  while (rpcRunning_.load()) {
 | 
			
		||||
    // rank, tensor size, message type
 | 
			
		||||
    std::vector<torch::Tensor> preamble = {torch::empty({3}, {torch::kInt64})};
 | 
			
		||||
    pg_->recvAnysource(preamble, pg_->getRank())->wait();
 | 
			
		||||
    auto work = pg_->recvAnysource(preamble, pg_->getRank());
 | 
			
		||||
    {
 | 
			
		||||
      std::lock_guard<std::mutex> guard(recvWorkMutex_);
 | 
			
		||||
      recvWork_ = work;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!rpcRunning_.load() || !work->wait() /* aborted */) {
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int64_t* preamble_items = preamble.front().storage().data<int64_t>();
 | 
			
		||||
 | 
			
		||||
    auto srcRank = preamble_items[0];
 | 
			
		||||
@ -483,9 +497,12 @@ void ProcessGroupAgent::listenLoop() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ProcessGroupAgent::pollTimedOutRPCs() {
 | 
			
		||||
  while (!shutdown_.load()) {
 | 
			
		||||
    std::chrono::milliseconds sleepTime;
 | 
			
		||||
  while (true) {
 | 
			
		||||
    std::unique_lock<std::mutex> lock{futureMutex_};
 | 
			
		||||
    if (!rpcRunning_.load()) {
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
    std::chrono::milliseconds sleepTime;
 | 
			
		||||
    // Estimate amount of time the first future will time out in, and sleep
 | 
			
		||||
    // for that long.
 | 
			
		||||
    // if there are no futures or the first future's RPC timeout is set to 0
 | 
			
		||||
@ -505,7 +522,7 @@ void ProcessGroupAgent::pollTimedOutRPCs() {
 | 
			
		||||
      futureTimeoutCV_.wait_for(lock, sleepTime);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (shutdown_.load()) {
 | 
			
		||||
    if (!rpcRunning_.load()) {
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,7 @@ namespace distributed {
 | 
			
		||||
namespace rpc {
 | 
			
		||||
 | 
			
		||||
struct ProcessGroupRpcBackendOptions : public RpcBackendOptions {
 | 
			
		||||
  ProcessGroupRpcBackendOptions() noexcept = default;
 | 
			
		||||
  ProcessGroupRpcBackendOptions() = default;
 | 
			
		||||
  int numSendRecvThreads;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@ -57,6 +57,8 @@ class ProcessGroupAgent : public RpcAgent {
 | 
			
		||||
 | 
			
		||||
  void start() override;
 | 
			
		||||
 | 
			
		||||
  void shutdown() override;
 | 
			
		||||
 | 
			
		||||
 protected:
 | 
			
		||||
  // This method wraps the destination information and the message into a
 | 
			
		||||
  // SendWork object, and put the SendWork into a queue. Another thread will
 | 
			
		||||
@ -143,10 +145,6 @@ class ProcessGroupAgent : public RpcAgent {
 | 
			
		||||
    return ++nextId_;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // atomic bool indicating if join() has been called and background threads
 | 
			
		||||
  // should shutdown.
 | 
			
		||||
  std::atomic_bool shutdown_;
 | 
			
		||||
 | 
			
		||||
  std::shared_ptr<c10d::ProcessGroup> pg_;
 | 
			
		||||
  // worker name -> rank
 | 
			
		||||
  std::unordered_map<std::string, int> nameMap_;
 | 
			
		||||
@ -159,12 +157,23 @@ class ProcessGroupAgent : public RpcAgent {
 | 
			
		||||
  MessageCounter recvCounts_;
 | 
			
		||||
 | 
			
		||||
  std::atomic<int64_t> nextId_;
 | 
			
		||||
  // atomic bool indicating if this agent is running. It is set in
 | 
			
		||||
  // ProcessGroupAgent::start and unset in ProcessGroupAgent::shutdown and
 | 
			
		||||
  // ProcessGroupAgent::join. It controls whether several background threads
 | 
			
		||||
  // should be running.
 | 
			
		||||
  // We lock access to this in shutdown() and pollTimedOutRPCs() to prevent race
 | 
			
		||||
  // conditions when notifying condition variables.
 | 
			
		||||
  std::atomic<bool> rpcRunning_{false};
 | 
			
		||||
  // one mutex per ProcessGroup rank, as ProcessGroup::send is not thread-safe
 | 
			
		||||
  // when using the same tag.
 | 
			
		||||
  std::vector<std::mutex> sendMutexes_;
 | 
			
		||||
  std::thread listenerThread_;
 | 
			
		||||
  // A thread to poll existing futures and check for timed out ones.
 | 
			
		||||
  std::thread futureTimeoutThread_;
 | 
			
		||||
  // Lock and shared ptr to currently pending work, set in listenloop() and
 | 
			
		||||
  // interruptible in shutdown().
 | 
			
		||||
  std::mutex recvWorkMutex_;
 | 
			
		||||
  std::shared_ptr<c10d::ProcessGroup::Work> recvWork_;
 | 
			
		||||
  // A threadPool that processing both SendWork and RecvWork. There are two
 | 
			
		||||
  // motivations for adding a ThreadPool:
 | 
			
		||||
  // (1) RPC serialization/deserialization and processing can be expensive,
 | 
			
		||||
 | 
			
		||||
@ -13,8 +13,9 @@ namespace distributed {
 | 
			
		||||
namespace rpc {
 | 
			
		||||
 | 
			
		||||
struct RpcBackendOptions {
 | 
			
		||||
  RpcBackendOptions() noexcept = default;
 | 
			
		||||
  RpcBackendOptions() = default;
 | 
			
		||||
  std::chrono::milliseconds rpcTimeout;
 | 
			
		||||
  std::string initMethod;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// A globally unique ID to identify an RpcAgent
 | 
			
		||||
@ -124,7 +125,11 @@ class TORCH_API RpcAgent {
 | 
			
		||||
  virtual void sync() = 0;
 | 
			
		||||
 | 
			
		||||
  // start accepting requests
 | 
			
		||||
  virtual void start() {}
 | 
			
		||||
  virtual void start() = 0;
 | 
			
		||||
 | 
			
		||||
  // Stop accepting requests and shutdown the RPC framework as soon as possible
 | 
			
		||||
  // by terminating all RPC threads.
 | 
			
		||||
  virtual void shutdown() = 0;
 | 
			
		||||
 | 
			
		||||
  // Set the default rpc agent.
 | 
			
		||||
  static void setDefaultRpcAgent(std::shared_ptr<RpcAgent> defaultRpcAgent);
 | 
			
		||||
 | 
			
		||||
@ -136,15 +136,16 @@ UserRRef<T>::UserRRef(
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
UserRRef<T>::~UserRRef() {
 | 
			
		||||
  // TODO: queue this in RRefContext instead of doing it here.
 | 
			
		||||
  auto& ctx = RRefContext::getInstance();
 | 
			
		||||
  if (ctx.getWorkerId() != ownerId_) {
 | 
			
		||||
    auto fm = ctx.agent()->send(
 | 
			
		||||
        ctx.agent()->getWorkerInfo(ownerId_),
 | 
			
		||||
        RRefUserDelete(rrefId_, forkId_).toMessage());
 | 
			
		||||
 | 
			
		||||
    fm->addCallback(
 | 
			
		||||
        [](const Message& message) { RRefContext::handleException(message); });
 | 
			
		||||
  try {
 | 
			
		||||
    RRefContext::getInstance().delUser(ownerId_, rrefId_, forkId_);
 | 
			
		||||
  } catch (const std::exception& ex) {
 | 
			
		||||
    LOG(ERROR) << "Error occurred when deleting UserRRef instance, "
 | 
			
		||||
               << "RRefId = " << rrefId_ << ", ForkId = " << forkId_ << " : "
 | 
			
		||||
               << ex.what();
 | 
			
		||||
  } catch (...) {
 | 
			
		||||
    LOG(ERROR) << "Error occurred when deleting UserRRef instance, "
 | 
			
		||||
               << "RRefId = " << rrefId_ << ", ForkId = " << forkId_ << " : "
 | 
			
		||||
               << "unknown error";
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -13,8 +13,13 @@ RRefContext& RRefContext::getInstance() {
 | 
			
		||||
  return *context;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RRefContext::destroyInstance() {
 | 
			
		||||
  RRefContext::getInstance().checkRRefLeaks();
 | 
			
		||||
void RRefContext::destroyInstance(bool ignoreRRefLeak) {
 | 
			
		||||
  auto& ctx = RRefContext::getInstance();
 | 
			
		||||
  {
 | 
			
		||||
    std::lock_guard<std::mutex> lock(ctx.destroyedMutex_);
 | 
			
		||||
    ctx.destroyed_ = true;
 | 
			
		||||
  }
 | 
			
		||||
  ctx.checkRRefLeaks(ignoreRRefLeak);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RRefContext::handleException(const Message& message) {
 | 
			
		||||
@ -27,7 +32,7 @@ void RRefContext::handleException(const Message& message) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RRefContext::RRefContext(std::shared_ptr<RpcAgent> agent)
 | 
			
		||||
    : agent_(std::move(agent)) {}
 | 
			
		||||
    : agent_(std::move(agent)), destroyed_(false) {}
 | 
			
		||||
 | 
			
		||||
RRefContext::~RRefContext() {
 | 
			
		||||
  if (!owners_.empty()) {
 | 
			
		||||
@ -36,7 +41,7 @@ RRefContext::~RRefContext() {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RRefContext::checkRRefLeaks() {
 | 
			
		||||
void RRefContext::checkRRefLeaks(bool ignoreRRefLeak) {
 | 
			
		||||
  if (!forks_.empty()) {
 | 
			
		||||
    std::stringstream ss;
 | 
			
		||||
    for (auto& entry : forks_) {
 | 
			
		||||
@ -46,7 +51,21 @@ void RRefContext::checkRRefLeaks() {
 | 
			
		||||
           << std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    AT_ERROR(ss.str());
 | 
			
		||||
 | 
			
		||||
    if (ignoreRRefLeak) {
 | 
			
		||||
      LOG(WARNING)
 | 
			
		||||
          << "Detected RRef Leaks during shutdown. This usually "
 | 
			
		||||
          << "occurs when the application code still holds references to RRef "
 | 
			
		||||
          << "instances when calling shutdown(). If the program has "
 | 
			
		||||
          << "completed correctly and the process is exiting, it is OK to "
 | 
			
		||||
          << "ignore these leaks. However, if you program will keep running "
 | 
			
		||||
          << "after this, these leaks could result in memory leaks on RRef "
 | 
			
		||||
          << "owners. Please make sure all RRefs are out of scope and Python "
 | 
			
		||||
          << "GC has deleted them before calling shutdown(): \n"
 | 
			
		||||
          << ss.str();
 | 
			
		||||
    } else {
 | 
			
		||||
      AT_ERROR(ss.str());
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -96,6 +115,21 @@ template std::shared_ptr<UserRRef<py::object>> RRefContext::createUserRRef<
 | 
			
		||||
    const RRefId& rrefId,
 | 
			
		||||
    const ForkId& forkId);
 | 
			
		||||
 | 
			
		||||
void RRefContext::delUser(
 | 
			
		||||
    const worker_id_t owner,
 | 
			
		||||
    const RRefId& rrefId,
 | 
			
		||||
    const ForkId& forkId) {
 | 
			
		||||
  std::lock_guard<std::mutex> lock(destroyedMutex_);
 | 
			
		||||
  if (!destroyed_) {
 | 
			
		||||
    auto fm = agent_->send(
 | 
			
		||||
        agent_->getWorkerInfo(owner),
 | 
			
		||||
        RRefUserDelete(rrefId, forkId).toMessage());
 | 
			
		||||
 | 
			
		||||
    fm->addCallback(
 | 
			
		||||
        [](const Message& message) { RRefContext::handleException(message); });
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
std::shared_ptr<RRef> RRefContext::getOrCreateRRef(const RRefForkData& rfd) {
 | 
			
		||||
  auto& ownerId = rfd.ownerId_;
 | 
			
		||||
 | 
			
		||||
@ -16,7 +16,7 @@ namespace rpc {
 | 
			
		||||
class RRefContext {
 | 
			
		||||
 public:
 | 
			
		||||
  static RRefContext& getInstance();
 | 
			
		||||
  static void destroyInstance();
 | 
			
		||||
  static void destroyInstance(bool ignoreRRefLeak = true);
 | 
			
		||||
 | 
			
		||||
  static void handleException(const Message& message);
 | 
			
		||||
 | 
			
		||||
@ -111,6 +111,11 @@ class RRefContext {
 | 
			
		||||
  void addPendingUser(const ForkId& forkId, const std::shared_ptr<RRef>& rref);
 | 
			
		||||
  void delPendingUser(const ForkId& forkId);
 | 
			
		||||
 | 
			
		||||
  void delUser(
 | 
			
		||||
      const worker_id_t owner,
 | 
			
		||||
      const RRefId& rrefId,
 | 
			
		||||
      const ForkId& forkId);
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
  RRefContext(std::shared_ptr<RpcAgent>);
 | 
			
		||||
 | 
			
		||||
@ -123,7 +128,7 @@ class RRefContext {
 | 
			
		||||
  void finishForkRequest(const ForkId& forkId, worker_id_t parent);
 | 
			
		||||
 | 
			
		||||
  // If there is any leak on any RRef, this method will throw an error.
 | 
			
		||||
  void checkRRefLeaks();
 | 
			
		||||
  void checkRRefLeaks(bool ignoreRRefLeak);
 | 
			
		||||
 | 
			
		||||
  static std::atomic<local_id_t> nextLocalId_;
 | 
			
		||||
 | 
			
		||||
@ -157,6 +162,9 @@ class RRefContext {
 | 
			
		||||
  //     owner learns about the forked child.
 | 
			
		||||
  std::unordered_map<ForkId, std::shared_ptr<RRef>, ForkId::Hash>
 | 
			
		||||
      pendingChildren_;
 | 
			
		||||
 | 
			
		||||
  std::mutex destroyedMutex_;
 | 
			
		||||
  bool destroyed_;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace rpc
 | 
			
		||||
 | 
			
		||||
@ -751,5 +751,33 @@ std::tuple<std::string, RawDataExportMap> export_onnx(
 | 
			
		||||
      graph_encoder.get_raw_data_export_map());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
void export_opnames(const script::Module& m, std::set<std::string>& opnames) {
 | 
			
		||||
  for (const auto& method : m.get_methods()) {
 | 
			
		||||
    const auto& func = method.function();
 | 
			
		||||
    for (const auto& node : func.graph()->nodes()) {
 | 
			
		||||
      auto op = findOperatorFor(node);
 | 
			
		||||
      if (op) {
 | 
			
		||||
        auto opname = node->schema().operator_name();
 | 
			
		||||
        std::string namestr = opname.name;
 | 
			
		||||
        if (!opname.overload_name.empty()) {
 | 
			
		||||
          namestr += "." + opname.overload_name;
 | 
			
		||||
        }
 | 
			
		||||
        opnames.emplace(namestr);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  for (const auto& sub_m : m.children()) {
 | 
			
		||||
    export_opnames(sub_m, opnames);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
} // namespace
 | 
			
		||||
 | 
			
		||||
std::vector<std::string> export_opnames(const script::Module& m) {
 | 
			
		||||
  std::set<std::string> names;
 | 
			
		||||
  export_opnames(m, names);
 | 
			
		||||
  return std::vector<std::string>(names.begin(), names.end());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace jit
 | 
			
		||||
} // namespace torch
 | 
			
		||||
 | 
			
		||||
@ -65,5 +65,8 @@ using ExportModuleExtraFilesHook =
 | 
			
		||||
    std::function<script::ExtraFilesMap(const script::Module&)>;
 | 
			
		||||
TORCH_API void SetExportModuleExtraFilesHook(ExportModuleExtraFilesHook hook);
 | 
			
		||||
 | 
			
		||||
// Returns a list of names of all operators in the module and its submodules.
 | 
			
		||||
TORCH_API std::vector<std::string> export_opnames(const script::Module& m);
 | 
			
		||||
 | 
			
		||||
} // namespace jit
 | 
			
		||||
} // namespace torch
 | 
			
		||||
 | 
			
		||||
@ -23,8 +23,8 @@ namespace jit {
 | 
			
		||||
static std::atomic<bool> executor_mode{false};
 | 
			
		||||
static std::atomic<bool> profiling_mode{false};
 | 
			
		||||
#else
 | 
			
		||||
static std::atomic<bool> executor_mode{true};
 | 
			
		||||
static std::atomic<bool> profiling_mode{true};
 | 
			
		||||
static std::atomic<bool> executor_mode{false};
 | 
			
		||||
static std::atomic<bool> profiling_mode{false};
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user