mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 21:49:24 +08:00
Compare commits
59 Commits
ffast_math
...
v1.4.1
Author | SHA1 | Date | |
---|---|---|---|
74044638f7 | |||
7f73f1d591 | |||
ac15471de4 | |||
49364eb426 | |||
bcf2d65446 | |||
f7a33f1eef | |||
bd584d52df | |||
c697af4667 | |||
0f3f4ec64c | |||
509df600bb | |||
187101a88e | |||
e011d4a16e | |||
8ada95e950 | |||
21c2481dfe | |||
398e8ba182 | |||
074b30cdcb | |||
319bd5d431 | |||
5a20bbd377 | |||
fa59a9e190 | |||
143868c3df | |||
964929fcc2 | |||
cd20ecb472 | |||
19d4fd4910 | |||
a7d187baa4 | |||
0541546ac5 | |||
369ab73efd | |||
9f558e1ee6 | |||
f0ddfff200 | |||
2de184b5a9 | |||
e0eeddfc78 | |||
7727b57d08 | |||
9e7dc37f90 | |||
227017059f | |||
aeeccc1486 | |||
0b91246cbd | |||
0856d6f53c | |||
336e0d2874 | |||
3b36f2068d | |||
6207945564 | |||
aecae514ab | |||
27a2ecb0a5 | |||
e36fd7b0ba | |||
799cb646a6 | |||
f60c63155a | |||
954d9ea466 | |||
71185fb2a0 | |||
a06f26560c | |||
e4cec279c6 | |||
b8b50aa909 | |||
db686de13f | |||
288e463693 | |||
73783d1048 | |||
8891d4eeb1 | |||
2085a6f329 | |||
3eda9e7da2 | |||
fb8aa0e98c | |||
c79b79dadd | |||
21acca4528 | |||
f710757557 |
@ -36,6 +36,8 @@ class Conf(object):
|
||||
# The cpu nightlies are built on the pytorch/manylinux-cuda100 docker image
|
||||
alt_docker_suffix = self.cuda_version or "100"
|
||||
docker_distro_suffix = "" if self.pydistro == "conda" else alt_docker_suffix
|
||||
if self.cuda_version == "101":
|
||||
return "soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916"
|
||||
return miniutils.quote("pytorch/" + docker_distro_prefix + "-cuda" + docker_distro_suffix)
|
||||
|
||||
def get_name_prefix(self):
|
||||
|
@ -24,11 +24,11 @@ CONFIG_TREE_DATA = [
|
||||
("5", [
|
||||
XImportant("3.6"), # This is actually the ASAN build
|
||||
]),
|
||||
("7", [
|
||||
("3.6", [
|
||||
("xla", [XImportant(True)]),
|
||||
]),
|
||||
]),
|
||||
# ("7", [
|
||||
# ("3.6", [
|
||||
# ("xla", [XImportant(True)]),
|
||||
# ]),
|
||||
# ]),
|
||||
]),
|
||||
("cuda", [
|
||||
("9", [
|
||||
|
@ -210,6 +210,7 @@ def instantiate_configs():
|
||||
android_abi = fc.find_prop("android_abi")
|
||||
parms_list_ignored_for_docker_image.append(android_abi)
|
||||
restrict_phases = ["build"]
|
||||
fc.props["is_important"] = True
|
||||
|
||||
elif compiler_name:
|
||||
gcc_version = compiler_name + (fc.find_prop("compiler_version") or "")
|
||||
|
@ -307,27 +307,28 @@ jobs:
|
||||
time docker pull ${DOCKER_IMAGE} >/dev/null
|
||||
export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
|
||||
|
||||
# TODO We may want to move the rebase logic to a separate step after checkout
|
||||
# Rebase to master only if in xenial_py3_6_gcc5_4 case
|
||||
if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
|
||||
echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
|
||||
set -x
|
||||
git config --global user.email "circleci.ossci@gmail.com"
|
||||
git config --global user.name "CircleCI"
|
||||
git config remote.origin.url https://github.com/pytorch/pytorch.git
|
||||
git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
|
||||
git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
|
||||
export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
|
||||
echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
|
||||
export GIT_COMMIT=${CIRCLE_SHA1}
|
||||
echo "GIT_COMMIT: " ${GIT_COMMIT}
|
||||
git checkout -f ${GIT_COMMIT}
|
||||
git reset --hard ${GIT_COMMIT}
|
||||
git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
|
||||
set +x
|
||||
else
|
||||
echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
|
||||
fi
|
||||
# NB: Temporarily disable the rebase logic in v1.4.0, don't merge this change into master
|
||||
# # TODO We may want to move the rebase logic to a separate step after checkout
|
||||
# # Rebase to master only if in xenial_py3_6_gcc5_4 case
|
||||
# if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
|
||||
# echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
|
||||
# set -x
|
||||
# git config --global user.email "circleci.ossci@gmail.com"
|
||||
# git config --global user.name "CircleCI"
|
||||
# git config remote.origin.url https://github.com/pytorch/pytorch.git
|
||||
# git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
|
||||
# git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
|
||||
# export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
|
||||
# echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
|
||||
# export GIT_COMMIT=${CIRCLE_SHA1}
|
||||
# echo "GIT_COMMIT: " ${GIT_COMMIT}
|
||||
# git checkout -f ${GIT_COMMIT}
|
||||
# git reset --hard ${GIT_COMMIT}
|
||||
# git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
|
||||
# set +x
|
||||
# else
|
||||
# echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
|
||||
# fi
|
||||
|
||||
git submodule sync && git submodule update -q --init --recursive
|
||||
|
||||
@ -1709,20 +1710,6 @@ workflows:
|
||||
build_environment: "pytorch-linux-xenial-py3-clang5-asan-test"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-asan:405"
|
||||
resource_class: large
|
||||
- pytorch_linux_build:
|
||||
name: pytorch_xla_linux_xenial_py3_6_clang7_build
|
||||
requires:
|
||||
- setup
|
||||
build_environment: "pytorch-xla-linux-xenial-py3.6-clang7-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-clang7:405"
|
||||
- pytorch_linux_test:
|
||||
name: pytorch_xla_linux_xenial_py3_6_clang7_test
|
||||
requires:
|
||||
- setup
|
||||
- pytorch_xla_linux_xenial_py3_6_clang7_build
|
||||
build_environment: "pytorch-xla-linux-xenial-py3.6-clang7-test"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3.6-clang7:405"
|
||||
resource_class: large
|
||||
- pytorch_linux_build:
|
||||
name: pytorch_linux_xenial_cuda9_cudnn7_py3_build
|
||||
requires:
|
||||
@ -1874,33 +1861,18 @@ workflows:
|
||||
name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_64_build
|
||||
requires:
|
||||
- setup
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_64-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405"
|
||||
- pytorch_linux_build:
|
||||
name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v7a_build
|
||||
requires:
|
||||
- setup
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v7a-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405"
|
||||
- pytorch_linux_build:
|
||||
name: pytorch_linux_xenial_py3_clang5_android_ndk_r19c_arm_v8a_build
|
||||
requires:
|
||||
- setup
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
build_environment: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c-arm-v8a-build"
|
||||
docker_image: "308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-xenial-py3-clang5-android-ndk-r19c:405"
|
||||
# Warning: indentation here matters!
|
||||
@ -2292,7 +2264,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2305,7 +2277,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2318,7 +2290,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2331,7 +2303,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2344,7 +2316,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2505,7 +2477,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2518,7 +2490,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2531,7 +2503,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2544,7 +2516,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: postnightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2718,7 +2690,7 @@ workflows:
|
||||
branches:
|
||||
only: postnightly
|
||||
libtorch_variant: "shared-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2732,7 +2704,7 @@ workflows:
|
||||
branches:
|
||||
only: postnightly
|
||||
libtorch_variant: "shared-without-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2746,7 +2718,7 @@ workflows:
|
||||
branches:
|
||||
only: postnightly
|
||||
libtorch_variant: "static-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -2760,7 +2732,7 @@ workflows:
|
||||
branches:
|
||||
only: postnightly
|
||||
libtorch_variant: "static-without-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- smoke_linux_test:
|
||||
@ -3212,7 +3184,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_manywheel_2_7mu_cu101_devtoolset7_nightly_build
|
||||
build_environment: "manywheel 2.7mu cu101 devtoolset7"
|
||||
@ -3221,7 +3193,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_manywheel_3_5m_cu101_devtoolset7_nightly_build
|
||||
build_environment: "manywheel 3.5m cu101 devtoolset7"
|
||||
@ -3230,7 +3202,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_manywheel_3_6m_cu101_devtoolset7_nightly_build
|
||||
build_environment: "manywheel 3.6m cu101 devtoolset7"
|
||||
@ -3239,7 +3211,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_manywheel_3_7m_cu101_devtoolset7_nightly_build
|
||||
build_environment: "manywheel 3.7m cu101 devtoolset7"
|
||||
@ -3248,7 +3220,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_conda_2_7_cpu_devtoolset7_nightly_build
|
||||
build_environment: "conda 2.7 cpu devtoolset7"
|
||||
@ -3365,7 +3337,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_conda_3_5_cu101_devtoolset7_nightly_build
|
||||
build_environment: "conda 3.5 cu101 devtoolset7"
|
||||
@ -3374,7 +3346,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_conda_3_6_cu101_devtoolset7_nightly_build
|
||||
build_environment: "conda 3.6 cu101 devtoolset7"
|
||||
@ -3383,7 +3355,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_conda_3_7_cu101_devtoolset7_nightly_build
|
||||
build_environment: "conda 3.7 cu101 devtoolset7"
|
||||
@ -3392,7 +3364,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cpu_devtoolset7_nightly_shared-with-deps_build
|
||||
build_environment: "libtorch 2.7m cpu devtoolset7"
|
||||
@ -3522,7 +3494,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "shared-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_shared-without-deps_build
|
||||
build_environment: "libtorch 2.7m cu101 devtoolset7"
|
||||
@ -3532,7 +3504,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "shared-without-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_static-with-deps_build
|
||||
build_environment: "libtorch 2.7m cu101 devtoolset7"
|
||||
@ -3542,7 +3514,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "static-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cu101_devtoolset7_nightly_static-without-deps_build
|
||||
build_environment: "libtorch 2.7m cu101 devtoolset7"
|
||||
@ -3552,7 +3524,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "static-without-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cpu_gcc5_4_cxx11-abi_nightly_shared-with-deps_build
|
||||
build_environment: "libtorch 2.7m cpu gcc5.4_cxx11-abi"
|
||||
@ -4056,7 +4028,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4068,7 +4040,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4080,7 +4052,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4092,7 +4064,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4104,7 +4076,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4252,7 +4224,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4264,7 +4236,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4276,7 +4248,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4288,7 +4260,7 @@ workflows:
|
||||
filters:
|
||||
branches:
|
||||
only: nightly
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4449,7 +4421,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "shared-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4462,7 +4434,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "shared-without-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4475,7 +4447,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "static-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
@ -4488,7 +4460,7 @@ workflows:
|
||||
branches:
|
||||
only: nightly
|
||||
libtorch_variant: "static-without-deps"
|
||||
docker_image: "pytorch/manylinux-cuda101"
|
||||
docker_image: soumith/manylinux-cuda101@sha256:5d62be90d5b7777121180e6137c7eed73d37aaf9f669c51b783611e37e0b4916
|
||||
use_cuda_docker_runtime: "1"
|
||||
resource_class: gpu.medium
|
||||
- binary_linux_test:
|
||||
|
@ -11,6 +11,8 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
|
||||
source activate testenv >/dev/null
|
||||
elif [[ "$DESIRED_PYTHON" == 2.7mu ]]; then
|
||||
export PATH="/opt/python/cp27-cp27mu/bin:\$PATH"
|
||||
elif [[ "$DESIRED_PYTHON" == 3.8m ]]; then
|
||||
export PATH="/opt/python/cp38-cp38/bin:\$PATH"
|
||||
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
|
||||
python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"
|
||||
export PATH="/opt/python/cp\$python_nodot-cp\${python_nodot}m/bin:\$PATH"
|
||||
|
@ -53,8 +53,10 @@ default_set = set([
|
||||
'pytorch-macos-10.13-cuda9.2-cudnn7-py3',
|
||||
# PyTorch Android
|
||||
'pytorch-linux-xenial-py3-clang5-android-ndk-r19c-x86_32-build',
|
||||
'pytorch-linux-xenial-py3-clang5-android-ndk-r19',
|
||||
# PyTorch Android gradle
|
||||
'pytorch-linux-xenial-py3-clang5-android-ndk-r19c-gradle-build-only-x86_32',
|
||||
|
||||
# Pytorch iOS builds
|
||||
'pytorch-ios-11.2.1-x86_64_build',
|
||||
'pytorch-ios-11.2.1-arm64_build',
|
||||
|
@ -19,27 +19,28 @@ jobs:
|
||||
time docker pull ${DOCKER_IMAGE} >/dev/null
|
||||
export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
|
||||
|
||||
# TODO We may want to move the rebase logic to a separate step after checkout
|
||||
# Rebase to master only if in xenial_py3_6_gcc5_4 case
|
||||
if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
|
||||
echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
|
||||
set -x
|
||||
git config --global user.email "circleci.ossci@gmail.com"
|
||||
git config --global user.name "CircleCI"
|
||||
git config remote.origin.url https://github.com/pytorch/pytorch.git
|
||||
git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
|
||||
git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
|
||||
export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
|
||||
echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
|
||||
export GIT_COMMIT=${CIRCLE_SHA1}
|
||||
echo "GIT_COMMIT: " ${GIT_COMMIT}
|
||||
git checkout -f ${GIT_COMMIT}
|
||||
git reset --hard ${GIT_COMMIT}
|
||||
git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
|
||||
set +x
|
||||
else
|
||||
echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
|
||||
fi
|
||||
# NB: Temporarily disable the rebase logic in v1.4.0, don't merge this change into master
|
||||
# # TODO We may want to move the rebase logic to a separate step after checkout
|
||||
# # Rebase to master only if in xenial_py3_6_gcc5_4 case
|
||||
# if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
|
||||
# echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
|
||||
# set -x
|
||||
# git config --global user.email "circleci.ossci@gmail.com"
|
||||
# git config --global user.name "CircleCI"
|
||||
# git config remote.origin.url https://github.com/pytorch/pytorch.git
|
||||
# git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
|
||||
# git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
|
||||
# export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
|
||||
# echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
|
||||
# export GIT_COMMIT=${CIRCLE_SHA1}
|
||||
# echo "GIT_COMMIT: " ${GIT_COMMIT}
|
||||
# git checkout -f ${GIT_COMMIT}
|
||||
# git reset --hard ${GIT_COMMIT}
|
||||
# git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
|
||||
# set +x
|
||||
# else
|
||||
# echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
|
||||
# fi
|
||||
|
||||
git submodule sync && git submodule update -q --init --recursive
|
||||
|
||||
|
8
.github/workflows/lint.yml
vendored
8
.github/workflows/lint.yml
vendored
@ -16,7 +16,7 @@ jobs:
|
||||
python-version: 3.x
|
||||
architecture: x64
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@master
|
||||
uses: actions/checkout@v1
|
||||
- name: Ensure consistent CircleCI YAML config
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
@ -51,7 +51,7 @@ jobs:
|
||||
python-version: 3.x
|
||||
architecture: x64
|
||||
- name: Fetch PyTorch
|
||||
uses: actions/checkout@master
|
||||
uses: actions/checkout@v1
|
||||
- name: Checkout PR tip
|
||||
run: |
|
||||
set -eux
|
||||
@ -87,7 +87,7 @@ jobs:
|
||||
python-version: 2.x
|
||||
architecture: x64
|
||||
- name: Fetch PyTorch
|
||||
uses: actions/checkout@master
|
||||
uses: actions/checkout@v1
|
||||
- name: Checkout PR tip
|
||||
run: |
|
||||
set -eux
|
||||
@ -126,7 +126,7 @@ jobs:
|
||||
python-version: 3.x
|
||||
architecture: x64
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@master
|
||||
uses: actions/checkout@v1
|
||||
- name: Checkout PR tip
|
||||
run: |
|
||||
set -eux
|
||||
|
@ -64,7 +64,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
|
||||
# if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
|
||||
# Hotfix, use hypothesis 3.44.6 on Ubuntu 14.04
|
||||
# See comments on
|
||||
# https://github.com/HypothesisWorks/hypothesis-python/commit/eadd62e467d6cee6216e71b391951ec25b4f5830
|
||||
@ -74,9 +74,9 @@ if [[ "$BUILD_ENVIRONMENT" == *ubuntu14.04* ]]; then
|
||||
sudo pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
|
||||
sudo pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
|
||||
sudo pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
|
||||
else
|
||||
pip install --user --no-cache-dir hypothesis==3.59.0
|
||||
fi
|
||||
# else
|
||||
# pip install --user --no-cache-dir hypothesis==3.59.0
|
||||
# fi
|
||||
|
||||
# Collect additional tests to run (outside caffe2/python)
|
||||
EXTRA_TESTS=()
|
||||
@ -133,7 +133,7 @@ pip install --user pytest-sugar
|
||||
# torchvision tests #
|
||||
#####################
|
||||
if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
|
||||
pip install -q --user git+https://github.com/pytorch/vision.git
|
||||
pip install -q --user git+https://github.com/pytorch/vision.git@v0.5.0
|
||||
pip install -q --user ninja
|
||||
# JIT C++ extensions require ninja, so put it into PATH.
|
||||
export PATH="/var/lib/jenkins/.local/bin:$PATH"
|
||||
@ -141,7 +141,7 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
|
||||
# default pip version is too old(9.0.2), unable to support tag `manylinux2010`.
|
||||
# Fix the pip error: Couldn't find a version that satisfies the requirement
|
||||
sudo pip install --upgrade pip
|
||||
pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.0.0.dev1104
|
||||
pip install -q --user -i https://test.pypi.org/simple/ ort-nightly==1.1.0.dev1228
|
||||
fi
|
||||
"$ROOT_DIR/scripts/onnx/test.sh"
|
||||
fi
|
||||
|
@ -49,7 +49,7 @@ if [[ "$BUILD_ENVIRONMENT" != *ppc64le* ]]; then
|
||||
export PATH="/var/lib/jenkins/.local/bin:$PATH"
|
||||
|
||||
# TODO: move this to Docker
|
||||
pip_install --user hypothesis
|
||||
pip_install --user "hypothesis==4.53.2"
|
||||
|
||||
# TODO: move this to Docker
|
||||
PYTHON_VERSION=$(python -c 'import platform; print(platform.python_version())'|cut -c1)
|
||||
@ -214,7 +214,7 @@ test_backward_compatibility() {
|
||||
pushd test/backward_compatibility
|
||||
python dump_all_function_schemas.py --filename new_schemas.txt
|
||||
pip_uninstall torch
|
||||
pip_install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
|
||||
pip_install torch==1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
python check_backward_compatibility.py --new-schemas new_schemas.txt
|
||||
popd
|
||||
set +x
|
||||
|
@ -22,7 +22,7 @@ if NOT "%BUILD_ENVIRONMENT%"=="" (
|
||||
:: Numba is pinned to 0.44.0 to avoid https://github.com/numba/numba/issues/4352
|
||||
call conda install -y -q python=3.6.7 numpy mkl cffi pyyaml boto3 protobuf numba==0.44.0
|
||||
)
|
||||
pip install -q ninja future hypothesis "librosa>=0.6.2" psutil pillow
|
||||
pip install -q ninja future "hypothesis==4.53.2" "librosa>=0.6.2" psutil pillow
|
||||
:: No need to install faulthandler since we only test Python >= 3.6 on Windows
|
||||
:: faulthandler is builtin since Python 3.3
|
||||
|
||||
|
@ -413,7 +413,7 @@ public abstract class Tensor {
|
||||
*/
|
||||
public long[] getDataAsLongArray() {
|
||||
throw new IllegalStateException(
|
||||
"Tensor of type " + getClass().getSimpleName() + " cannot return data as float array.");
|
||||
"Tensor of type " + getClass().getSimpleName() + " cannot return data as long array.");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -16,7 +16,7 @@
|
||||
// merge the libraries inside Facebook". Well, the problem is that there
|
||||
// are some downstream applications which are at binary size limit, and
|
||||
// incorporating all of the extra code from libtorch would push them
|
||||
// over (admarket/adreview/service:adreviewservice, see also
|
||||
// over (admarket/adreview/service:adreviewservice, see also
|
||||
// https://github.com/pytorch/pytorch/pull/29299) So if you want to do that,
|
||||
// we have to fix all of the services like this.
|
||||
//
|
||||
@ -50,10 +50,10 @@ struct CAFFE2_API VariableHooksInterface {
|
||||
virtual const std::string& name(const Tensor&) const = 0;
|
||||
};
|
||||
|
||||
C10_API void SetVariableHooks(VariableHooksInterface* hooks);
|
||||
C10_API VariableHooksInterface* GetVariableHooks();
|
||||
CAFFE2_API void SetVariableHooks(VariableHooksInterface* hooks);
|
||||
CAFFE2_API VariableHooksInterface* GetVariableHooks();
|
||||
|
||||
struct C10_API VariableHooksRegisterer {
|
||||
struct CAFFE2_API VariableHooksRegisterer {
|
||||
explicit VariableHooksRegisterer(VariableHooksInterface* hooks) {
|
||||
SetVariableHooks(hooks);
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ Tensor cosine_embedding_loss(const Tensor& input1, const Tensor& input2, const T
|
||||
auto denom = (mag_square1 * mag_square2).sqrt_();
|
||||
auto cos = prod_sum / denom;
|
||||
|
||||
auto zeros = at::zeros_like(target, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
|
||||
auto zeros = at::zeros_like(cos, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
|
||||
auto pos = 1 - cos;
|
||||
auto neg = (cos - margin).clamp_min_(0);
|
||||
auto output_pos = at::where(target == 1, pos, zeros);
|
||||
@ -77,8 +77,8 @@ Tensor margin_ranking_loss(const Tensor& input1, const Tensor& input2, const Ten
|
||||
}
|
||||
|
||||
Tensor kl_div(const Tensor& input, const Tensor& target, int64_t reduction) {
|
||||
auto zeros = at::zeros_like(target, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
|
||||
auto output_pos = target * (at::log(target) - input);
|
||||
auto zeros = at::zeros_like(output_pos, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
|
||||
auto output = at::where(target > 0, output_pos, zeros);
|
||||
return apply_loss_reduction(output, reduction);
|
||||
}
|
||||
|
@ -324,13 +324,14 @@ Tensor unflatten(const Tensor& self, int64_t dim, IntArrayRef sizes, DimnameList
|
||||
"up to the size of dim ", dim, " (", self.names()[dim], ": ", self.size(dim),
|
||||
") in Tensor", self.names());
|
||||
|
||||
int64_t dim_wrap = maybe_wrap_dim(dim, self.dim());
|
||||
auto outnames = self.names().vec();
|
||||
outnames.erase(outnames.begin() + dim);
|
||||
outnames.insert(outnames.begin() + dim, names.begin(), names.end());
|
||||
outnames.erase(outnames.begin() + dim_wrap);
|
||||
outnames.insert(outnames.begin() + dim_wrap, names.begin(), names.end());
|
||||
|
||||
auto new_sizes = self.sizes().vec();
|
||||
new_sizes.erase(new_sizes.begin() + dim);
|
||||
new_sizes.insert(new_sizes.begin() + dim, sizes.begin(), sizes.end());
|
||||
new_sizes.erase(new_sizes.begin() + dim_wrap);
|
||||
new_sizes.insert(new_sizes.begin() + dim_wrap, sizes.begin(), sizes.end());
|
||||
|
||||
Tensor result;
|
||||
{
|
||||
|
@ -138,5 +138,14 @@ Tensor max_pool3d(
|
||||
self, kernel_size, stride, padding, dilation, ceil_mode);
|
||||
return std::get<0>(output_and_indices);
|
||||
}
|
||||
|
||||
Tensor _test_optional_float(const Tensor & self, c10::optional<double> scale) {
|
||||
if (scale.has_value()) {
|
||||
return at::full({}, scale.value(), self.options());
|
||||
} else {
|
||||
return at::empty({0}, self.options());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
@ -134,6 +134,7 @@ std::vector<Tensor> where(const Tensor& condition) {
|
||||
}
|
||||
|
||||
Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& other) {
|
||||
TORCH_CHECK(self.dtype() == other.dtype(), "expected scalar type ", self.dtype(), " but found ", other.dtype());
|
||||
Tensor ret = at::empty(self.sizes(), self.options());
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX(ret.scalar_type(), "where_cpu", [&] {
|
||||
where_cpu<scalar_t>(ret, condition, self, other);
|
||||
|
@ -42,7 +42,22 @@ static void copy_kernel(TensorIterator& iter, bool non_blocking) {
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, dtype, "copy_", [&] {
|
||||
using dest_t = scalar_t;
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(ScalarType::Half, ScalarType::Bool, ScalarType::BFloat16, iter.dtype(1), "copy_", [&] {
|
||||
cpu_kernel(iter, c10::static_cast_with_inter_type<dest_t, scalar_t>);
|
||||
// Note (@zasdfgbnm):
|
||||
//
|
||||
// The code below can not be simplified as
|
||||
// cpu_kernel(iter, c10::static_cast_with_inter_type<dest_t, scalar_t>::apply);
|
||||
//
|
||||
// because this would force the compiler to instantiate the inline function and generate a function call in the loop
|
||||
// instead of inlining it, making all the optimizations like vectorization impossible.
|
||||
// You can verify this by looking the the symbols of `libtorch_cpu.so`:
|
||||
//
|
||||
// readelf -Ws libtorch_cpu.so | grep static_cast_with_inter_type
|
||||
//
|
||||
// If done correctly, the above command should have no output.
|
||||
//
|
||||
// See: https://github.com/pytorch/pytorch/issues/31271
|
||||
cpu_kernel(iter, [](scalar_t src) -> dest_t {
|
||||
return c10::static_cast_with_inter_type<dest_t, scalar_t>(src); });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
@ -171,8 +171,9 @@ void avg_pool2d_out_cuda_template(
|
||||
|
||||
output.resize_({nbatch, nInputPlane, outputHeight, outputWidth});
|
||||
|
||||
const int count = safe_downcast<int, int64_t>(output.numel());
|
||||
const int num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
|
||||
const int32_t count = safe_downcast<int32_t, int64_t>(output.numel());
|
||||
const uint32_t num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
|
||||
const uint32_t num_blocks = cuda::ATenCeilDiv<uint32_t>(count, num_threads);
|
||||
|
||||
if (divisor_override.has_value()) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(),
|
||||
@ -184,7 +185,7 @@ void avg_pool2d_out_cuda_template(
|
||||
scalar_t *input_data = input.data_ptr<scalar_t>();
|
||||
|
||||
avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, false, true>
|
||||
<<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
<<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
count,
|
||||
input_data,
|
||||
nbatch,
|
||||
@ -209,7 +210,7 @@ void avg_pool2d_out_cuda_template(
|
||||
scalar_t *input_data = input.data_ptr<scalar_t>();
|
||||
|
||||
avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, true, false>
|
||||
<<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
<<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
count,
|
||||
input_data,
|
||||
nbatch,
|
||||
@ -233,7 +234,7 @@ void avg_pool2d_out_cuda_template(
|
||||
scalar_t *input_data = input.data_ptr<scalar_t>();
|
||||
|
||||
avg_pool2d_out_cuda_frame<scalar_t, accscalar_t, false, false>
|
||||
<<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
<<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
count,
|
||||
input_data,
|
||||
nbatch,
|
||||
@ -249,10 +250,8 @@ void avg_pool2d_out_cuda_template(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TORCH_CHECK(cudaGetLastError() == cudaSuccess,
|
||||
"avg_pool2d_out_cuda_frame failed with error code ",
|
||||
cudaGetLastError());
|
||||
|
||||
THCudaCheck(cudaGetLastError());
|
||||
|
||||
if (input.ndimension() == 3) {
|
||||
output.resize_({nInputPlane, outputHeight, outputWidth});
|
||||
@ -322,8 +321,9 @@ Tensor& avg_pool2d_backward_out_cuda_template(
|
||||
|
||||
gradInput.resize_as_(input);
|
||||
|
||||
const int count = safe_downcast<int, int64_t>(input.numel());
|
||||
const int num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
|
||||
const int32_t count = safe_downcast<int32_t, int64_t>(input.numel());
|
||||
const uint32_t num_threads = std::min(at::cuda::getCurrentDeviceProperties()->maxThreadsPerBlock, 1024);
|
||||
const uint32_t num_blocks = cuda::ATenCeilDiv<uint32_t>(count, num_threads);
|
||||
|
||||
if (divisor_override.has_value()) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(),
|
||||
@ -335,7 +335,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
|
||||
scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>();
|
||||
|
||||
avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, false, true>
|
||||
<<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
<<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
count,
|
||||
gradOutput_data,
|
||||
nbatch,
|
||||
@ -360,7 +360,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
|
||||
scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>();
|
||||
|
||||
avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, true, false>
|
||||
<<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
<<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
count,
|
||||
gradOutput_data,
|
||||
nbatch,
|
||||
@ -384,7 +384,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
|
||||
scalar_t *gradInput_data = gradInput.data_ptr<scalar_t>();
|
||||
|
||||
avg_pool2d_backward_out_cuda_frame<scalar_t, accscalar_t, false, false>
|
||||
<<<cuda::ATenCeilDiv(count, num_threads), num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
<<<num_blocks, num_threads, 0, at::cuda::getCurrentCUDAStream()>>>(
|
||||
count,
|
||||
gradOutput_data,
|
||||
nbatch,
|
||||
@ -400,9 +400,7 @@ Tensor& avg_pool2d_backward_out_cuda_template(
|
||||
}
|
||||
}
|
||||
|
||||
TORCH_CHECK(cudaGetLastError() == cudaSuccess,
|
||||
"avg_pool2d_backward_out_cuda failed with error code ",
|
||||
cudaGetLastError());
|
||||
THCudaCheck(cudaGetLastError());
|
||||
|
||||
return gradInput;
|
||||
}
|
||||
|
@ -22,16 +22,88 @@ static inline __host__ __device__ T powi(T a, T b) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// SFINAE doesn't work well with NVCC under Windows for math functions like pow and sqrt.
|
||||
// So we need to define the functions with the explicit function signatures.
|
||||
// As for pow, the following signatures are defined as the device function:
|
||||
// pow(float, int)
|
||||
// pow(double, int)
|
||||
// pow(float, float)
|
||||
// pow(double, double)
|
||||
// As for sqrt, the following signatures are defined as the device function:
|
||||
// sqrt(float)
|
||||
// sqrt(double)
|
||||
// As for inverse sqrt, we must define it explicitly in MSVC, otherwise the static cast will be
|
||||
// applied to the result of the inline function, and thus the result is incorrect.
|
||||
// e.g. if we use 1.0 / sqrt(2) for 2 ^ (-0.5) in MSVC, we get
|
||||
// int(2 ^ (-0.5)) = int(1.0 / sqrt(2)) = int(1.0 / int(1.414)) = int(1.0 / 1) = 1
|
||||
// However, the correct result is
|
||||
// int(2 ^ (-0.5)) = int(1.0 / 1.414) = 0
|
||||
#ifdef _MSC_VER
|
||||
// Functions for pow
|
||||
// pow for at::Half
|
||||
static inline __host__ __device__ at::Half pow_(at::Half base, at::Half exp) {
|
||||
return static_cast<at::Half>(std::pow(static_cast<float>(base), static_cast<float>(exp)));
|
||||
}
|
||||
// pow (floating, floating/int)
|
||||
template <typename Base_type, typename Exp_type>
|
||||
static inline __host__ __device__ typename std::enable_if<std::is_floating_point<Base_type>::value && (std::is_same<Base_type, Exp_type>::value || std::is_same<Exp_type, int>::value), Base_type>::type
|
||||
pow_(Base_type base, Exp_type exp) {
|
||||
return std::pow(base, exp);
|
||||
}
|
||||
// pow (integral, integral)
|
||||
template <typename Base_type, typename Exp_type>
|
||||
static inline __host__ __device__ typename std::enable_if<std::is_integral<Base_type>::value && std::is_same<Base_type, Exp_type>::value, Base_type>::type
|
||||
pow_(Base_type base, Exp_type exp) {
|
||||
return powi(base, exp);
|
||||
}
|
||||
// pow (Otherwise)
|
||||
template <typename Base_type, typename Exp_type>
|
||||
static inline __host__ __device__ typename std::enable_if<!std::is_same<Base_type, Exp_type>::value && !std::is_same<Exp_type, int>::value, Base_type>::type
|
||||
pow_(Base_type base, Exp_type exp) {
|
||||
return static_cast<Base_type>(std::pow(static_cast<double>(base), static_cast<double>(exp)));
|
||||
}
|
||||
// Functions for sqrt
|
||||
// sqrt (floating)
|
||||
template <typename T>
|
||||
static inline __host__ __device__ T sqrt(T x) {
|
||||
static inline __host__ __device__ typename std::enable_if<std::is_floating_point<T>::value, T>::type sqrt_(T x) {
|
||||
return std::sqrt(x);
|
||||
}
|
||||
// sqrt (integral)
|
||||
template <typename T>
|
||||
static inline __host__ __device__ typename std::enable_if<!std::is_floating_point<T>::value, T>::type sqrt_(T x) {
|
||||
return static_cast<T>(std::sqrt(static_cast<double>(x)));
|
||||
}
|
||||
// Function for inverse sqrt
|
||||
// invsqrt (floating)
|
||||
template <typename T>
|
||||
static inline __host__ __device__ typename std::enable_if<std::is_floating_point<T>::value, T>::type invsqrt_(T x) {
|
||||
return 1.0 / std::sqrt(x);
|
||||
}
|
||||
// invsqrt (integral)
|
||||
template <typename T>
|
||||
static inline __host__ __device__ typename std::enable_if<!std::is_floating_point<T>::value, T>::type invsqrt_(T x) {
|
||||
return static_cast<T>(1.0 / std::sqrt(static_cast<double>(x)));
|
||||
}
|
||||
#else
|
||||
template <typename Base_type, typename Exp_type>
|
||||
static inline __host__ __device__ Base_type pow_(Base_type base, Exp_type exp) {
|
||||
return std::pow(base, exp);
|
||||
}
|
||||
template <typename T>
|
||||
static inline __host__ __device__ T sqrt_(T x) {
|
||||
return ::sqrt(x);
|
||||
}
|
||||
template <typename T>
|
||||
static inline __host__ __device__ T invsqrt_(T x) {
|
||||
return 1.0 / ::sqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
void pow_tensor_tensor_kernel(TensorIterator& iter) {
|
||||
if (isFloatingType(iter.dtype())) {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND_HALF(iter.dtype(), "pow_cuda", [&]() {
|
||||
gpu_kernel(iter, []GPU_LAMBDA(scalar_t base, scalar_t exp) -> scalar_t {
|
||||
return std::pow(base, exp);
|
||||
return pow_(base, exp);
|
||||
});
|
||||
});
|
||||
} else {
|
||||
@ -49,7 +121,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter,
|
||||
const auto d_exp = static_cast<double>(exp);
|
||||
if (d_exp == 0.5) {
|
||||
gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
|
||||
return ::sqrt(base);
|
||||
return sqrt_(base);
|
||||
});
|
||||
} else if (d_exp == 2) {
|
||||
gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
|
||||
@ -61,7 +133,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter,
|
||||
});
|
||||
} else if (d_exp == -0.5) {
|
||||
gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
|
||||
return 1.0 / ::sqrt(base);
|
||||
return invsqrt_(base);
|
||||
});
|
||||
} else if (d_exp == -1) {
|
||||
gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
|
||||
@ -73,7 +145,7 @@ void pow_tensor_scalar_kernel_impl(TensorIterator& iter,
|
||||
});
|
||||
} else {
|
||||
gpu_kernel(iter, [=]GPU_LAMBDA(Base_type base) -> Base_type {
|
||||
return std::pow(base, exp);
|
||||
return pow_(base, exp);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ Tensor& linspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step
|
||||
} else if (steps == 1) {
|
||||
r.fill_(start);
|
||||
} else {
|
||||
AT_DISPATCH_FLOATING_TYPES(r.scalar_type(), "linspace_cuda", [&]() {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND_HALF(r.scalar_type(), "linspace_cuda", [&]() {
|
||||
scalar_t scalar_start = start.to<scalar_t>();
|
||||
scalar_t scalar_end = end.to<scalar_t>();
|
||||
scalar_t step = (scalar_end - scalar_start) / static_cast<scalar_t>(steps - 1);
|
||||
@ -84,7 +84,7 @@ Tensor& logspace_cuda_out(Tensor& result, Scalar start, Scalar end, int64_t step
|
||||
} else if (steps == 1) {
|
||||
r.fill_(std::pow(base, start.to<double>()));
|
||||
} else {
|
||||
AT_DISPATCH_FLOATING_TYPES(r.scalar_type(), "logspace_cuda", [&]() {
|
||||
AT_DISPATCH_FLOATING_TYPES_AND_HALF(r.scalar_type(), "logspace_cuda", [&]() {
|
||||
scalar_t scalar_base = static_cast<scalar_t>(base);
|
||||
scalar_t scalar_start = start.to<scalar_t>();
|
||||
scalar_t scalar_end = end.to<scalar_t>();
|
||||
|
@ -6068,6 +6068,9 @@
|
||||
CPU: replication_pad3d_backward_cpu
|
||||
CUDA: replication_pad3d_backward_cuda
|
||||
|
||||
- func: _test_optional_float(Tensor self, *, float? scale=None) -> Tensor
|
||||
variants: function
|
||||
|
||||
- func: upsample_linear1d.out(Tensor self, int[1] output_size, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)
|
||||
python_module: nn
|
||||
dispatch:
|
||||
|
@ -77,6 +77,8 @@ def type_argument_translations(arg):
|
||||
# Enables float by translating to legacy double.
|
||||
elif t == 'float':
|
||||
t = 'double'
|
||||
elif t == 'float?':
|
||||
t = 'double?'
|
||||
# Enables str by translating to legacy std::string.
|
||||
elif t == 'str':
|
||||
t = 'std::string'
|
||||
|
25
caffe2/operators/alias_with_name.cc
Normal file
25
caffe2/operators/alias_with_name.cc
Normal file
@ -0,0 +1,25 @@
|
||||
#include "caffe2/operators/alias_with_name.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CPU_OPERATOR(AliasWithName, AliasWithNameOp<CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(AliasWithName)
|
||||
.NumInputs(1)
|
||||
.NumOutputs(1)
|
||||
.AllowInplace({{0, 0}})
|
||||
.IdenticalTypeAndShape()
|
||||
.SetDoc(R"DOC(
|
||||
Similar with AliasOp, storing the alias name as operator argument.
|
||||
)DOC")
|
||||
.Arg("name", "name of the aliasing")
|
||||
.Arg("is_backward", "weather or not to alias forward or backward")
|
||||
.Input(0, "input", "Input tensor whose storage will be shared.")
|
||||
.Output(0, "output", "Tensor of same shape as input, sharing its storage.");
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
|
||||
AliasWithName,
|
||||
"_caffe2::AliasWithName(Tensor input, str name, bool is_backward = False) -> (Tensor output)",
|
||||
caffe2::AliasWithNameOp<caffe2::CPUContext>);
|
12
caffe2/operators/alias_with_name.cu
Normal file
12
caffe2/operators/alias_with_name.cu
Normal file
@ -0,0 +1,12 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/alias_with_name.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
REGISTER_CUDA_OPERATOR(AliasWithName, AliasWithNameOp<CUDAContext>);
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(
|
||||
AliasWithName,
|
||||
caffe2::AliasWithNameOp<caffe2::CUDAContext>);
|
46
caffe2/operators/alias_with_name.h
Normal file
46
caffe2/operators/alias_with_name.h
Normal file
@ -0,0 +1,46 @@
|
||||
#ifndef ALIAS_WITH_NAME_OP_H_
|
||||
#define ALIAS_WITH_NAME_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
|
||||
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(AliasWithName)
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <class Context>
|
||||
class AliasWithNameOp final : public Operator<Context> {
|
||||
public:
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
template <class... Args>
|
||||
explicit AliasWithNameOp(Args&&... args)
|
||||
: Operator<Context>(std::forward<Args>(args)...),
|
||||
name_(this->template GetSingleArgument<std::string>(
|
||||
"name",
|
||||
"invalid_name")),
|
||||
is_backward_(
|
||||
this->template GetSingleArgument<bool>("is_backward", false)) {
|
||||
CAFFE_ENFORCE(
|
||||
OperatorBase::HasArgument("name"), "You have to specify argument name");
|
||||
}
|
||||
|
||||
bool RunOnDevice() override {
|
||||
auto& input = Input(0);
|
||||
CAFFE_ENFORCE_GE(input.numel(), 0, "Tensor is not initialized");
|
||||
|
||||
// This doesn't work anymore as this is "newstyle" operator
|
||||
// OutputTensorAlias(0, input);
|
||||
|
||||
OperatorBase::SetOutputTensor(0, input.Alias());
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string name_;
|
||||
bool is_backward_;
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // ALIAS_WITH_NAME_OP_H_
|
169
caffe2/operators/batch_permutation_op.cc
Normal file
169
caffe2/operators/batch_permutation_op.cc
Normal file
@ -0,0 +1,169 @@
|
||||
#include "caffe2/operators/batch_permutation_op.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <bool forwards>
|
||||
void batch_permutation_loop(
|
||||
const int N,
|
||||
const int K,
|
||||
const float* src,
|
||||
const int* indices,
|
||||
float* dst) {
|
||||
long numBytes = K * sizeof(float);
|
||||
if (forwards) {
|
||||
#ifdef _OPENMP
|
||||
#if (_OPENMP >= 201307)
|
||||
#pragma omp parallel for simd
|
||||
#else
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
#endif
|
||||
for (int n = 0; n < N; n++) {
|
||||
int origIdx = n * K;
|
||||
int permuteIdx = indices[n] * K;
|
||||
std::memcpy(dst + origIdx, src + permuteIdx, numBytes);
|
||||
}
|
||||
} else {
|
||||
std::vector<int> backward_indices(N);
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
backward_indices[indices[i]] = i;
|
||||
}
|
||||
for (int n = 0; n < N; n++) {
|
||||
int permuteIdx = n * K;
|
||||
int origIdx = backward_indices[n] * K;
|
||||
std::memcpy(dst + permuteIdx, src + origIdx, numBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& indices = Input(1);
|
||||
|
||||
CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d");
|
||||
CAFFE_ENFORCE(
|
||||
X.dim32(0) == indices.dim32(0),
|
||||
"X.dim32(0) must be equal to indices.dim32(0)",
|
||||
"(",
|
||||
X.dim32(0),
|
||||
" vs. ",
|
||||
indices.dim32(0),
|
||||
")");
|
||||
|
||||
auto* Y = Output(0, X.sizes(), at::dtype<float>());
|
||||
|
||||
CAFFE_ENFORCE_GT(X.dim32(0), 0);
|
||||
batch_permutation_loop<true>(
|
||||
X.dim32(0),
|
||||
X.numel() / X.dim32(0),
|
||||
X.data<float>(),
|
||||
indices.data<int>(),
|
||||
Y->mutable_data<float>());
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool BatchPermutationGradientOp<float, CPUContext>::RunOnDevice() {
|
||||
auto& indices = Input(0);
|
||||
auto& dY = Input(1);
|
||||
|
||||
auto* dX = Output(0, dY.sizes(), at::dtype<float>());
|
||||
|
||||
CAFFE_ENFORCE_GT(dY.dim32(0), 0);
|
||||
batch_permutation_loop<false>(
|
||||
dY.dim32(0),
|
||||
dY.numel() / dY.dim32(0),
|
||||
dY.data<float>(),
|
||||
indices.data<int>(),
|
||||
dX->mutable_data<float>());
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
BatchPermutation,
|
||||
IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
|
||||
#endif
|
||||
|
||||
REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(
|
||||
BatchPermutationGradient,
|
||||
BatchPermutationGradientOp<float, CPUContext>);
|
||||
|
||||
// Input: X, indices; Output: Y
|
||||
OPERATOR_SCHEMA(BatchPermutation)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Batch permutation of an input tensor X given input indices. First dimension of
|
||||
X equals batch size N. The indices stores a be permutation of N.
|
||||
The output Y is a tensor of same shape as X, with data re-ordered according to
|
||||
the indices within the batch size.
|
||||
|
||||
Example of batch permutation on a 2-D tensor with batch size 4:
|
||||
X = [
|
||||
[1, 5, 2, 3, 4, 6, 0],
|
||||
[4, 3, 3, 5, 2, 3, 1],
|
||||
[2, 2, 3, 6, 0, 0, 1],
|
||||
[0, 0, 1, 1, 2, 2, 3]
|
||||
]
|
||||
indices = [2, 0, 1, 3]
|
||||
Y = [
|
||||
[2, 2, 3, 6, 0, 0, 1],
|
||||
[1, 5, 2, 3, 4, 6, 0],
|
||||
[4, 3, 3, 5, 2, 3, 1],
|
||||
[0, 0, 1, 1, 2, 2, 3]
|
||||
]
|
||||
|
||||
Example of batch permutation on a 3-D tensor with batch size 4:
|
||||
X = [
|
||||
[[1, 5, 2], [3, 4, 6, 0]],
|
||||
[[4, 3, 3], [5, 2, 3, 1]],
|
||||
[[2, 2, 3], [6, 0, 0, 1]],
|
||||
[[0, 0, 1], [1, 2, 2, 3]]
|
||||
]
|
||||
indices = [2, 0, 1, 3]
|
||||
Y = [
|
||||
[[2, 2, 3], [6, 0, 0, 1]],
|
||||
[[1, 5, 2], [3, 4, 6, 0]],
|
||||
[[4, 3, 3], [5, 2, 3, 1]],
|
||||
[[0, 0, 1], [1, 2, 2, 3]]
|
||||
]
|
||||
)DOC")
|
||||
.Input(0, "X", "Input tensor, where 1st dimension equals batch size")
|
||||
.Input(1, "indices", "Input indices of batch to permute")
|
||||
.Output(0, "Y", "Output permuted tensor");
|
||||
// Input: indices, dY (aka "gradOutput"); Output: dX (aka "gradInput")
|
||||
OPERATOR_SCHEMA(BatchPermutationGradient).NumInputs(2).NumOutputs(1);
|
||||
|
||||
class GetBatchPermutationGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
vector<OperatorDef> GetGradientDefs() override {
|
||||
return SingleGradientDef(
|
||||
"BatchPermutationGradient",
|
||||
"",
|
||||
vector<string>{I(1), GO(0)},
|
||||
vector<string>{GI(0)});
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient);
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
using BatchPermutationOpFloatCPU =
|
||||
caffe2::BatchPermutationOp<float, caffe2::CPUContext>;
|
||||
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_CPU(
|
||||
BatchPermutation,
|
||||
"_caffe2::BatchPermutation(Tensor X, Tensor indices) -> Tensor",
|
||||
BatchPermutationOpFloatCPU);
|
113
caffe2/operators/batch_permutation_op.cu
Normal file
113
caffe2/operators/batch_permutation_op.cu
Normal file
@ -0,0 +1,113 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/operators/batch_permutation_op.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
template <bool forward>
|
||||
__global__ void BatchPermutationKernel(
|
||||
int N,
|
||||
int K,
|
||||
const float* src,
|
||||
const int* indices,
|
||||
float* dst) {
|
||||
if (forward) {
|
||||
CUDA_1D_KERNEL_LOOP(index, N * K) {
|
||||
int k = index % K;
|
||||
int n = index / K;
|
||||
int idx = indices[n];
|
||||
CUDA_KERNEL_ASSERT(idx >= 0);
|
||||
CUDA_KERNEL_ASSERT(idx < N);
|
||||
dst[index] = src[idx * K + k];
|
||||
}
|
||||
} else {
|
||||
CUDA_1D_KERNEL_LOOP(index, N * K) {
|
||||
int k = index % K;
|
||||
int n = index / K;
|
||||
|
||||
// NOTE: an alternative implementation if we want to align the index with
|
||||
// the output tensor (rather than the input tensor).
|
||||
// int idx = -1;
|
||||
// for (size_t i = 0; i < N; ++i) {
|
||||
// if (indices[i] == n) {
|
||||
// idx = i;
|
||||
// }
|
||||
// }
|
||||
// CUDA_KERNEL_ASSERT(idx >= 0);
|
||||
// CUDA_KERNEL_ASSERT(idx < N);
|
||||
// dst[index] = src[idx * K + k];
|
||||
|
||||
int idx = indices[n];
|
||||
CUDA_KERNEL_ASSERT(idx >= 0);
|
||||
CUDA_KERNEL_ASSERT(idx < N);
|
||||
dst[idx * K + k] = src[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
bool BatchPermutationOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& indices = Input(1);
|
||||
|
||||
CAFFE_ENFORCE(indices.dim() == 1, "indices must be 1-d");
|
||||
CAFFE_ENFORCE(
|
||||
X.dim32(0) == indices.dim32(0),
|
||||
"X.dim32(0) must be equal to indices.dim32(0)",
|
||||
"(",
|
||||
X.dim32(0),
|
||||
" vs. ",
|
||||
indices.dim32(0),
|
||||
")");
|
||||
|
||||
auto* Y = Output(0, X.sizes(), at::dtype<float>());
|
||||
|
||||
CAFFE_ENFORCE_GT(X.dim32(0), 0);
|
||||
BatchPermutationKernel<true>
|
||||
<<<CAFFE_GET_BLOCKS(X.numel()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0,
|
||||
context_.cuda_stream()>>>(
|
||||
X.dim32(0),
|
||||
X.numel() / X.dim32(0),
|
||||
X.data<float>(),
|
||||
indices.data<int>(),
|
||||
Y->mutable_data<float>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool BatchPermutationGradientOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& indices = Input(0);
|
||||
auto& dY = Input(1);
|
||||
auto* dX = Output(0, dY.sizes(), at::dtype<float>());
|
||||
|
||||
CAFFE_ENFORCE_GT(dY.dim32(0), 0);
|
||||
BatchPermutationKernel<false>
|
||||
<<<CAFFE_GET_BLOCKS(dY.numel()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0,
|
||||
context_.cuda_stream()>>>(
|
||||
dY.dim32(0),
|
||||
dY.numel() / dY.dim32(0),
|
||||
dY.data<float>(),
|
||||
indices.data<int>(),
|
||||
dX->mutable_data<float>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
BatchPermutation,
|
||||
BatchPermutationOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
BatchPermutationGradient,
|
||||
BatchPermutationGradientOp<float, CUDAContext>);
|
||||
} // namespace caffe2
|
||||
|
||||
using BatchPermutationOpFloatCUDA =
|
||||
caffe2::BatchPermutationOp<float, caffe2::CUDAContext>;
|
||||
|
||||
C10_EXPORT_CAFFE2_OP_TO_C10_CUDA(BatchPermutation, BatchPermutationOpFloatCUDA);
|
37
caffe2/operators/batch_permutation_op.h
Normal file
37
caffe2/operators/batch_permutation_op.h
Normal file
@ -0,0 +1,37 @@
|
||||
#ifndef BATCHPERMUTATION_OP_H_
|
||||
#define BATCHPERMUTATION_OP_H_
|
||||
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/export_caffe2_op_to_c10.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(BatchPermutation)
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename T, class Context>
|
||||
class BatchPermutationOp final : public Operator<Context> {
|
||||
public:
|
||||
template <class... Args>
|
||||
explicit BatchPermutationOp(Args&&... args)
|
||||
: Operator<Context>(std::forward<Args>(args)...) {}
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
|
||||
bool RunOnDevice();
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class BatchPermutationGradientOp final : public Operator<Context> {
|
||||
public:
|
||||
BatchPermutationGradientOp(const OperatorDef& def, Workspace* ws)
|
||||
: Operator<Context>(def, ws) {}
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
|
||||
bool RunOnDevice();
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // BATCHPERMUTATION_OP_H_
|
269
caffe2/operators/batch_permutation_op_gpu_test.cc
Normal file
269
caffe2/operators/batch_permutation_op_gpu_test.cc
Normal file
@ -0,0 +1,269 @@
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
#include "caffe2/core/flags.h"
|
||||
#include "caffe2/operators/batch_permutation_op.h"
|
||||
#include "caffe2/utils/eigen_utils.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace caffe2 {
|
||||
namespace {
|
||||
|
||||
// Add the vector as an input to a Workspace depending on the context of the
|
||||
// workspace
|
||||
|
||||
template <typename T>
|
||||
void AddInputCPU(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<T>& values,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
Blob* blob = ws->CreateBlob(name);
|
||||
auto* tensor = BlobGetMutableTensor(blob, CPU);
|
||||
tensor->Resize(shape);
|
||||
EigenVectorMap<T> tensor_vec(tensor->mutable_data<T>(), tensor->numel());
|
||||
tensor_vec.array() = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>{
|
||||
values.data(), static_cast<int>(values.size())};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AddInputGPU(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<T>& values,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
Tensor tmp(shape, CPU);
|
||||
EigenVectorMap<T> tmp_vec(tmp.mutable_data<T>(), tmp.numel());
|
||||
tmp_vec.array() = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>{
|
||||
values.data(), static_cast<int>(values.size())};
|
||||
|
||||
Blob* blob = ws->CreateBlob(name);
|
||||
auto* tensor = BlobGetMutableTensor(blob, CUDA);
|
||||
tensor->CopyFrom(tmp);
|
||||
}
|
||||
|
||||
// Overload 4 different signatures for AddInput because clang does not allow
|
||||
// template <typename T>
|
||||
// void AddInput<CPUContext>(...) {...}
|
||||
|
||||
template <typename T, class Context>
|
||||
void AddInput(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<T>& values,
|
||||
const string& name,
|
||||
Workspace* ws);
|
||||
|
||||
template <>
|
||||
void AddInput<int, CPUContext>(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<int>& values,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
AddInputCPU<int>(shape, values, name, ws);
|
||||
}
|
||||
|
||||
template <>
|
||||
void AddInput<float, CPUContext>(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<float>& values,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
AddInputCPU<float>(shape, values, name, ws);
|
||||
}
|
||||
|
||||
template <>
|
||||
void AddInput<int, CUDAContext>(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<int>& values,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
AddInputGPU<int>(shape, values, name, ws);
|
||||
}
|
||||
|
||||
template <>
|
||||
void AddInput<float, CUDAContext>(
|
||||
const vector<int64_t>& shape,
|
||||
const vector<float>& values,
|
||||
const string& name,
|
||||
Workspace* ws) {
|
||||
AddInputGPU<float>(shape, values, name, ws);
|
||||
}
|
||||
|
||||
template <class Context>
|
||||
DeviceTypeProto GetDeviceType() {
|
||||
return PROTO_CPU;
|
||||
}
|
||||
template <>
|
||||
DeviceTypeProto GetDeviceType<CUDAContext>() {
|
||||
return PROTO_CUDA;
|
||||
}
|
||||
|
||||
// Create a BatchPermutationOp with the given inputs (actual values are
|
||||
// generated sequentially) and run it
|
||||
template <class Context>
|
||||
void CreateAndRun(
|
||||
TensorCPU* outResult,
|
||||
int N,
|
||||
vector<int64_t>& shape,
|
||||
vector<float>& features,
|
||||
vector<int> indices) {
|
||||
Workspace ws;
|
||||
|
||||
AddInput<float, Context>(shape, features, "X", &ws);
|
||||
AddInput<int, Context>(vector<int64_t>{N}, indices, "indices", &ws);
|
||||
|
||||
OperatorDef def;
|
||||
def.set_name("test");
|
||||
def.set_type("BatchPermutation");
|
||||
def.add_input("X");
|
||||
def.add_input("indices");
|
||||
def.add_output("Y");
|
||||
def.mutable_device_option()->set_device_type(GetDeviceType<Context>());
|
||||
unique_ptr<OperatorBase> op = CreateOperator(def, &ws);
|
||||
|
||||
EXPECT_NE(nullptr, op.get());
|
||||
EXPECT_TRUE(op->Run());
|
||||
|
||||
Blob* Y_blob = ws.GetBlob("Y");
|
||||
EXPECT_NE(nullptr, Y_blob);
|
||||
|
||||
auto& Y = Y_blob->Get<Tensor>();
|
||||
outResult->CopyFrom(Y);
|
||||
}
|
||||
|
||||
// Create a BatchPermutationOp with the given inputs (actual values are
|
||||
// generated sequentially) and run it
|
||||
template <class Context>
|
||||
void CreateAndRunGradient(
|
||||
TensorCPU* outResult,
|
||||
int N,
|
||||
vector<int64_t>& shape,
|
||||
vector<float>& features,
|
||||
vector<int> indices) {
|
||||
Workspace ws;
|
||||
|
||||
AddInput<float, Context>(shape, features, "dY", &ws);
|
||||
AddInput<int, Context>(vector<int64_t>{N}, indices, "indices", &ws);
|
||||
|
||||
OperatorDef def;
|
||||
def.set_name("test");
|
||||
def.set_type("BatchPermutationGradient");
|
||||
def.add_input("indices");
|
||||
def.add_input("dY");
|
||||
def.add_output("dX");
|
||||
def.mutable_device_option()->set_device_type(GetDeviceType<Context>());
|
||||
unique_ptr<OperatorBase> op = CreateOperator(def, &ws);
|
||||
|
||||
EXPECT_NE(nullptr, op.get());
|
||||
EXPECT_TRUE(op->Run());
|
||||
|
||||
Blob* Y_blob = ws.GetBlob("dX");
|
||||
EXPECT_NE(nullptr, Y_blob);
|
||||
|
||||
auto& Y = Y_blob->Get<Tensor>();
|
||||
outResult->CopyFrom(Y);
|
||||
}
|
||||
|
||||
// Check that the CPU and GPU implementations provide the exact same results
|
||||
void CheckCPUGPUEqual(vector<int64_t> shape, vector<int> indices) {
|
||||
// Prepare input data
|
||||
EXPECT_GT(shape.size(), 1);
|
||||
int N = shape[0];
|
||||
int input_size = 1;
|
||||
for (auto k : shape) {
|
||||
input_size *= k;
|
||||
}
|
||||
int K = input_size / N;
|
||||
vector<float> features(input_size);
|
||||
std::iota(features.begin(), features.end(), 0);
|
||||
|
||||
// CPU outputs
|
||||
Tensor y_cpu{CPU};
|
||||
Tensor y_cpu_grad{CPU};
|
||||
|
||||
// CPU BatchPermutation
|
||||
CreateAndRun<CPUContext>(&y_cpu, N, shape, features, indices);
|
||||
|
||||
// CPU BatchPermutationGradient
|
||||
CreateAndRunGradient<CPUContext>(&y_cpu_grad, N, shape, features, indices);
|
||||
|
||||
// Check CPU output values
|
||||
for (auto i = 0; i < indices.size(); ++i) {
|
||||
for (auto k = 0; k < K; ++k) {
|
||||
EXPECT_NEAR(
|
||||
y_cpu.data<float>()[indices[i] * K + k], features[i * K + k], 1e4);
|
||||
EXPECT_NEAR(
|
||||
y_cpu_grad.data<float>()[i * K + k],
|
||||
features[indices[i] * K + k],
|
||||
1e4);
|
||||
}
|
||||
}
|
||||
|
||||
if (!caffe2::HasCudaGPU()) {
|
||||
VLOG(2) << "No CudaGPU found. Skip GPU test." << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// GPU outputs
|
||||
Tensor y_gpu{CPU};
|
||||
Tensor y_gpu_grad{CPU};
|
||||
|
||||
// GPU BatchPermutation
|
||||
CreateAndRun<CPUContext>(&y_gpu, N, shape, features, indices);
|
||||
|
||||
// Compare CPU and GPU BatchPermutation outputs
|
||||
EXPECT_EQ(y_cpu.sizes(), y_gpu.sizes());
|
||||
ConstEigenVectorMap<float> y_cpu_vec(y_cpu.data<float>(), y_cpu.numel());
|
||||
ConstEigenVectorMap<float> y_gpu_vec(y_gpu.data<float>(), y_gpu.numel());
|
||||
EXPECT_TRUE(y_cpu_vec.isApprox(y_gpu_vec));
|
||||
|
||||
// GPU BatchPermutationGradient
|
||||
CreateAndRunGradient<CUDAContext>(&y_gpu_grad, N, shape, features, indices);
|
||||
|
||||
// Check GPU outputs
|
||||
for (auto i = 0; i < indices.size(); ++i) {
|
||||
for (auto k = 0; k < K; ++k) {
|
||||
EXPECT_NEAR(
|
||||
y_gpu.data<float>()[indices[i] * K + k], features[i * K + k], 1e4);
|
||||
EXPECT_NEAR(
|
||||
y_gpu_grad.data<float>()[i * K + k],
|
||||
features[indices[i] * K + k],
|
||||
1e4);
|
||||
}
|
||||
}
|
||||
|
||||
// Compare CPU and GPU BatchPermutationGradient outputs
|
||||
EXPECT_EQ(y_cpu_grad.sizes(), y_gpu_grad.sizes());
|
||||
ConstEigenVectorMap<float> y_cpu_vec_grad(
|
||||
y_cpu_grad.data<float>(), y_cpu_grad.numel());
|
||||
ConstEigenVectorMap<float> y_gpu_vec_grad(
|
||||
y_gpu_grad.data<float>(), y_gpu_grad.numel());
|
||||
EXPECT_TRUE(y_cpu_vec_grad.isApprox(y_gpu_vec_grad));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(BatchPermutationTest, CHECKCPUGPUEqualGenericDimension) {
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
int batch_size = 8;
|
||||
int max_dimension = 6;
|
||||
vector<int64_t> shape = vector<int64_t>{batch_size};
|
||||
|
||||
auto seed = std::chrono::system_clock::now().time_since_epoch().count();
|
||||
std::default_random_engine generator(seed);
|
||||
|
||||
for (int i = 2; i < max_dimension; ++i) {
|
||||
std::uniform_int_distribution<> dis(1, i);
|
||||
shape.push_back(dis(generator));
|
||||
CheckCPUGPUEqual(shape, vector<int>{0, 1, 2, 3, 4, 5, 6, 7});
|
||||
CheckCPUGPUEqual(shape, vector<int>{7, 6, 5, 4, 3, 2, 1, 0});
|
||||
CheckCPUGPUEqual(shape, vector<int>{1, 3, 5, 7, 0, 2, 4, 6});
|
||||
CheckCPUGPUEqual(shape, vector<int>{4, 5, 6, 7, 0, 1, 2, 3});
|
||||
CheckCPUGPUEqual(shape, vector<int>{3, 1, 5, 7, 6, 2, 4, 0});
|
||||
}
|
||||
auto t1 = std::chrono::high_resolution_clock::now();
|
||||
double elapsed =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(t1 - t0).count();
|
||||
VLOG(2) << "Time elapsed: " << elapsed << " ms" << std::endl;
|
||||
return;
|
||||
}
|
||||
} // namespace caffe2
|
@ -136,7 +136,7 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
||||
bias_data = Input(BIAS).template data<T>();
|
||||
}
|
||||
|
||||
auto f = [&](Tensor* col_buffer) {
|
||||
auto f = [this, &filter_offset, &bias_data, &X, &buffer_shape, &N, &Xdata, &offset_data, &M, &filter, &output_image_size, &kernel_dim, &Ydata, &input_offset, &offset_offset, &output_offset] (Tensor* col_buffer) {
|
||||
col_buffer->Resize(buffer_shape);
|
||||
T* col_buffer_data = col_buffer->template mutable_data<T>();
|
||||
// Im2col, followed by gemm.
|
||||
|
31
caffe2/python/operator_test/alias_with_name_test.py
Normal file
31
caffe2/python/operator_test/alias_with_name_test.py
Normal file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import caffe2.python.hypothesis_test_util as hu
|
||||
import hypothesis.strategies as st
|
||||
import numpy as np
|
||||
from caffe2.python import core, utils
|
||||
from hypothesis import given
|
||||
|
||||
|
||||
class TestAliasWithNameOp(hu.HypothesisTestCase):
|
||||
@given(
|
||||
shape=st.lists(st.integers(0, 5), min_size=1, max_size=3),
|
||||
dtype=st.sampled_from([np.float32, np.int64]),
|
||||
**hu.gcs
|
||||
)
|
||||
def test_alias_with_name_op(self, shape, dtype, dc, gc):
|
||||
test_input = (100 * np.random.random(shape)).astype(dtype)
|
||||
test_inputs = [test_input]
|
||||
|
||||
alias_op = core.CreateOperator(
|
||||
"AliasWithName",
|
||||
["input"],
|
||||
["output"],
|
||||
device_option=gc,
|
||||
)
|
||||
alias_op.arg.add().CopyFrom(utils.MakeArgument("name", "whatever_name"))
|
||||
|
||||
def reference_func(x):
|
||||
return (x,)
|
||||
|
||||
self.assertReferenceChecks(gc, alias_op, test_inputs, reference_func)
|
@ -139,6 +139,7 @@ class TestMomentumSGD(serial.SerializedTestCase):
|
||||
[grad, m, lr, w, indices],
|
||||
sparse)
|
||||
|
||||
@unittest.skip("Test is flaky, see https://github.com/pytorch/pytorch/issues/31368")
|
||||
@unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
|
||||
@given(n=st.integers(4, 8), nesterov=st.booleans(), **hu.gcs)
|
||||
def test_fp16momentum_sgd(self, n, nesterov, gc, dc):
|
||||
|
@ -710,6 +710,15 @@ class TorchIntegration(hu.HypothesisTestCase):
|
||||
|
||||
torch.testing.assert_allclose(torch.tensor(expected_output), actual_output)
|
||||
|
||||
def test_alias_with_name_is_in_place(self):
|
||||
device = "cuda" if workspace.has_cuda_support else "cpu"
|
||||
x = torch.Tensor([3, 42]).to(device)
|
||||
y = torch.ops._caffe2.AliasWithName(x, "new_name")
|
||||
x[1] = 6
|
||||
torch.testing.assert_allclose(x, torch.Tensor([3, 6]).to(device))
|
||||
# y should also change because y is alias of x
|
||||
torch.testing.assert_allclose(y, torch.Tensor([3, 6]).to(device))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -36,6 +36,7 @@ torch.optim
|
||||
|
||||
- Vincent Quenneville-Belair (`vincentqb <https://github.com/vincentqb>`__)
|
||||
- Soumith Chintala (`soumith <https://github.com/soumith>`__)
|
||||
- Wanchao Liang (`wanchaol <https://github.com/wanchaol>`__)
|
||||
|
||||
Autograd Engine
|
||||
~~~~~~~~~~~~~~~
|
||||
@ -95,6 +96,20 @@ MKLDNN
|
||||
- Junjie Bai (`bddppq <https://github.com/bddppq>`__)
|
||||
- Yinghai Lu (`yinghai <https://github.com/yinghai>`__)
|
||||
|
||||
Mobile
|
||||
~~~~~~
|
||||
|
||||
- David Reiss (`dreiss <https://github.com/dreiss>`__)
|
||||
- Jiakai Liu (`ljk53 <https://github.com/ljk53>`__)
|
||||
|
||||
Quantization
|
||||
~~~~~~
|
||||
|
||||
- Raghuraman Krishnamoorthi (`dreiss <https://github.com/dreiss>`__)
|
||||
- Jerry Zhang (`jerryzh168 <https://github.com/jerryzh168>`__)
|
||||
- Lingyi Liu (`lly-zero-one <https://github.com/lly-zero-one>`__)
|
||||
- James Reed (`jamesr66a <https://github.com/jamesr66a>`__)
|
||||
|
||||
XLA
|
||||
~~~
|
||||
|
||||
@ -138,6 +153,9 @@ ONNX <-> PyTorch
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
- Lu Fang (`houseroad <https://github.com/houseroad>`__)
|
||||
- Lara Haidar (`lara-hdr <https://github.com/lara-hdr>`__)
|
||||
- Spandan Tiwari (`spandantiwari <https://github.com/spandantiwari>`__)
|
||||
- Bowen Bao (`BowenBao <https://github.com/BowenBao>`__)
|
||||
|
||||
Windows
|
||||
~~~~~~~
|
||||
|
@ -46,7 +46,6 @@ extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.autosummary',
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.todo',
|
||||
'sphinx.ext.coverage',
|
||||
'sphinx.ext.napoleon',
|
||||
|
@ -17,13 +17,13 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
|
||||
|
||||
notes/*
|
||||
PyTorch on XLA Devices <http://pytorch.org/xla/>
|
||||
|
||||
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 1
|
||||
:caption: Community
|
||||
:maxdepth: 1
|
||||
:caption: Language Bindings
|
||||
|
||||
community/*
|
||||
C++ API <https://pytorch.org/cppdocs/>
|
||||
Javadoc <https://pytorch.org/javadoc/>
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
@ -78,13 +78,13 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
|
||||
:caption: torchtext Reference
|
||||
|
||||
torchtext <https://pytorch.org/text>
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Other Languages
|
||||
:glob:
|
||||
:maxdepth: 1
|
||||
:caption: Community
|
||||
|
||||
C++ API <https://pytorch.org/cppdocs/>
|
||||
packages
|
||||
community/*
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
@ -314,13 +314,14 @@ The :class:`~torch.distributed.optim.DistributedOptimizer` operates as follows:
|
||||
Simple end to end example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Putting it all together, a very simple end to end example using distributed
|
||||
autograd and distributed optimizer is as follows:
|
||||
Putting it all together, the following is a simple end to end example using
|
||||
distributed autograd and the distributed optimizer. If the code is placed into a
|
||||
file called "dist_autograd_simple.py", it can be run with the command
|
||||
:code:`MASTER_ADDR="localhost" MASTER_PORT=29500 python dist_autograd_simple.py`:
|
||||
|
||||
.. code::
|
||||
|
||||
import multiprocessing as mp
|
||||
from tempfile import NamedTemporaryFile
|
||||
import torch
|
||||
import torch.distributed.autograd as dist_autograd
|
||||
from torch.distributed import rpc
|
||||
@ -330,52 +331,52 @@ autograd and distributed optimizer is as follows:
|
||||
def random_tensor():
|
||||
return torch.rand((3, 3), requires_grad=True)
|
||||
|
||||
def _run_process(self_rank, dst_rank, file_name):
|
||||
self_name = "worker{}".format(self_rank)
|
||||
def _run_process(rank, dst_rank, world_size):
|
||||
name = "worker{}".format(rank)
|
||||
dst_name = "worker{}".format(dst_rank)
|
||||
|
||||
# Initialize RPC.
|
||||
rpc.init_rpc(
|
||||
self_name=self_name,
|
||||
self_rank=self_rank,
|
||||
worker_name_to_id={"worker0": 0, "worker1": 1},
|
||||
init_method="file://{}".format(file_name),
|
||||
name=name,
|
||||
rank=rank,
|
||||
world_size=world_size
|
||||
)
|
||||
|
||||
# Use a distributed autograd context.
|
||||
with dist_autograd.context() as context_id:
|
||||
# Forward pass (create references on remote nodes).
|
||||
rref1 = rpc.remote(dst_name, random_tensor)
|
||||
rref2 = rpc.remote(dst_name, random_tensor)
|
||||
loss = rref1.to_here() + rref2.to_here()
|
||||
# Forward pass (create references on remote nodes).
|
||||
rref1 = rpc.remote(dst_name, random_tensor)
|
||||
rref2 = rpc.remote(dst_name, random_tensor)
|
||||
loss = rref1.to_here() + rref2.to_here()
|
||||
|
||||
# Backward pass (run distributed autograd).
|
||||
dist_autograd.backward([loss.sum()])
|
||||
# Backward pass (run distributed autograd).
|
||||
dist_autograd.backward([loss.sum()])
|
||||
|
||||
# Build DistributedOptimizer.
|
||||
dist_optim = DistributedOptimizer(
|
||||
optim.SGD,
|
||||
[rref1, rref2],
|
||||
lr=0.05,
|
||||
)
|
||||
# Build DistributedOptimizer.
|
||||
dist_optim = DistributedOptimizer(
|
||||
optim.SGD,
|
||||
[rref1, rref2],
|
||||
lr=0.05,
|
||||
)
|
||||
|
||||
# Run the distributed optimizer step.
|
||||
dist_optim.step()
|
||||
# Run the distributed optimizer step.
|
||||
dist_optim.step()
|
||||
|
||||
def run_process(self_rank, dst_rank, file_name):
|
||||
_run_process(self_rank, dst_rank, file_name)
|
||||
rpc.wait_all_workers()
|
||||
def run_process(rank, dst_rank, world_size):
|
||||
_run_process(rank, dst_rank, world_size)
|
||||
rpc.shutdown()
|
||||
|
||||
file_name = NamedTemporaryFile().name
|
||||
processes = []
|
||||
|
||||
# Run two workers.
|
||||
for i in range(2):
|
||||
p = mp.Process(target=run_process, args=(i, (i + 1) % 2, file_name))
|
||||
# Run world_size workers.
|
||||
world_size = 2
|
||||
for i in range(world_size):
|
||||
p = mp.Process(target=run_process, args=(i, (i + 1) % 2, world_size))
|
||||
p.start()
|
||||
processes.append(p)
|
||||
|
||||
for p in processes:
|
||||
p.join()
|
||||
|
||||
|
||||
.. _RFC: https://github.com/pytorch/pytorch/issues/23110
|
||||
|
@ -1,67 +0,0 @@
|
||||
DType
|
||||
=====
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: public enum DType
|
||||
|
||||
Codes representing tensor data types.
|
||||
|
||||
Enum Constants
|
||||
--------------
|
||||
FLOAT32
|
||||
^^^^^^^
|
||||
|
||||
.. java:field:: public static final DType FLOAT32
|
||||
:outertype: DType
|
||||
|
||||
Code for dtype torch.float32. \ :java:ref:`Tensor.dtype()`\
|
||||
|
||||
FLOAT64
|
||||
^^^^^^^
|
||||
|
||||
.. java:field:: public static final DType FLOAT64
|
||||
:outertype: DType
|
||||
|
||||
Code for dtype torch.float64. \ :java:ref:`Tensor.dtype()`\
|
||||
|
||||
INT32
|
||||
^^^^^
|
||||
|
||||
.. java:field:: public static final DType INT32
|
||||
:outertype: DType
|
||||
|
||||
Code for dtype torch.int32. \ :java:ref:`Tensor.dtype()`\
|
||||
|
||||
INT64
|
||||
^^^^^
|
||||
|
||||
.. java:field:: public static final DType INT64
|
||||
:outertype: DType
|
||||
|
||||
Code for dtype torch.int64. \ :java:ref:`Tensor.dtype()`\
|
||||
|
||||
INT8
|
||||
^^^^
|
||||
|
||||
.. java:field:: public static final DType INT8
|
||||
:outertype: DType
|
||||
|
||||
Code for dtype torch.int8. \ :java:ref:`Tensor.dtype()`\
|
||||
|
||||
UINT8
|
||||
^^^^^
|
||||
|
||||
.. java:field:: public static final DType UINT8
|
||||
:outertype: DType
|
||||
|
||||
Code for dtype torch.uint8. \ :java:ref:`Tensor.dtype()`\
|
||||
|
||||
Fields
|
||||
------
|
||||
jniCode
|
||||
^^^^^^^
|
||||
|
||||
.. java:field:: final int jniCode
|
||||
:outertype: DType
|
@ -1,297 +0,0 @@
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
.. java:import:: java.util Map
|
||||
|
||||
IValue
|
||||
======
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: public class IValue
|
||||
|
||||
Java representation of a TorchScript value, which is implemented as tagged union that can be one of the supported types: https://pytorch.org/docs/stable/jit.html#types .
|
||||
|
||||
Calling \ ``toX``\ methods for inappropriate types will throw \ :java:ref:`IllegalStateException`\ .
|
||||
|
||||
\ ``IValue``\ objects are constructed with \ ``IValue.from(value)``\ , \ ``IValue.tupleFrom(value1, value2, ...)``\ , \ ``IValue.listFrom(value1, value2, ...)``\ , or one of the \ ``dict``\ methods, depending on the key type.
|
||||
|
||||
Data is retrieved from \ ``IValue``\ objects with the \ ``toX()``\ methods. Note that \ ``str``\ -type IValues must be extracted with \ :java:ref:`toStr()`\ , rather than \ :java:ref:`toString()`\ .
|
||||
|
||||
\ ``IValue``\ objects may retain references to objects passed into their constructors, and may return references to their internal state from \ ``toX()``\ .
|
||||
|
||||
Methods
|
||||
-------
|
||||
dictLongKeyFrom
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue dictLongKeyFrom(Map<Long, IValue> map)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``Dict[int, V]``\ .
|
||||
|
||||
dictStringKeyFrom
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue dictStringKeyFrom(Map<String, IValue> map)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``Dict[str, V]``\ .
|
||||
|
||||
from
|
||||
^^^^
|
||||
|
||||
.. java:method:: public static IValue from(Tensor tensor)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``Tensor``\ .
|
||||
|
||||
from
|
||||
^^^^
|
||||
|
||||
.. java:method:: public static IValue from(boolean value)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``bool``\ .
|
||||
|
||||
from
|
||||
^^^^
|
||||
|
||||
.. java:method:: public static IValue from(long value)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``int``\ .
|
||||
|
||||
from
|
||||
^^^^
|
||||
|
||||
.. java:method:: public static IValue from(double value)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``float``\ .
|
||||
|
||||
from
|
||||
^^^^
|
||||
|
||||
.. java:method:: public static IValue from(String value)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``str``\ .
|
||||
|
||||
isBool
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public boolean isBool()
|
||||
:outertype: IValue
|
||||
|
||||
isBoolList
|
||||
^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isBoolList()
|
||||
:outertype: IValue
|
||||
|
||||
isDictLongKey
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isDictLongKey()
|
||||
:outertype: IValue
|
||||
|
||||
isDictStringKey
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isDictStringKey()
|
||||
:outertype: IValue
|
||||
|
||||
isDouble
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isDouble()
|
||||
:outertype: IValue
|
||||
|
||||
isDoubleList
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isDoubleList()
|
||||
:outertype: IValue
|
||||
|
||||
isList
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public boolean isList()
|
||||
:outertype: IValue
|
||||
|
||||
isLong
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public boolean isLong()
|
||||
:outertype: IValue
|
||||
|
||||
isLongList
|
||||
^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isLongList()
|
||||
:outertype: IValue
|
||||
|
||||
isNull
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public boolean isNull()
|
||||
:outertype: IValue
|
||||
|
||||
isString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isString()
|
||||
:outertype: IValue
|
||||
|
||||
isTensor
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isTensor()
|
||||
:outertype: IValue
|
||||
|
||||
isTensorList
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isTensorList()
|
||||
:outertype: IValue
|
||||
|
||||
isTuple
|
||||
^^^^^^^
|
||||
|
||||
.. java:method:: public boolean isTuple()
|
||||
:outertype: IValue
|
||||
|
||||
listFrom
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue listFrom(boolean... list)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``List[bool]``\ .
|
||||
|
||||
listFrom
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue listFrom(long... list)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``List[int]``\ .
|
||||
|
||||
listFrom
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue listFrom(double... list)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``List[float]``\ .
|
||||
|
||||
listFrom
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue listFrom(Tensor... list)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``List[Tensor]``\ .
|
||||
|
||||
listFrom
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue listFrom(IValue... array)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``List[T]``\ . All elements must have the same type.
|
||||
|
||||
optionalNull
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue optionalNull()
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``Optional``\ that contains no value.
|
||||
|
||||
toBool
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public boolean toBool()
|
||||
:outertype: IValue
|
||||
|
||||
toBoolList
|
||||
^^^^^^^^^^
|
||||
|
||||
.. java:method:: public boolean[] toBoolList()
|
||||
:outertype: IValue
|
||||
|
||||
toDictLongKey
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public Map<Long, IValue> toDictLongKey()
|
||||
:outertype: IValue
|
||||
|
||||
toDictStringKey
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public Map<String, IValue> toDictStringKey()
|
||||
:outertype: IValue
|
||||
|
||||
toDouble
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public double toDouble()
|
||||
:outertype: IValue
|
||||
|
||||
toDoubleList
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public double[] toDoubleList()
|
||||
:outertype: IValue
|
||||
|
||||
toList
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public IValue[] toList()
|
||||
:outertype: IValue
|
||||
|
||||
toLong
|
||||
^^^^^^
|
||||
|
||||
.. java:method:: public long toLong()
|
||||
:outertype: IValue
|
||||
|
||||
toLongList
|
||||
^^^^^^^^^^
|
||||
|
||||
.. java:method:: public long[] toLongList()
|
||||
:outertype: IValue
|
||||
|
||||
toStr
|
||||
^^^^^
|
||||
|
||||
.. java:method:: public String toStr()
|
||||
:outertype: IValue
|
||||
|
||||
toTensor
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public Tensor toTensor()
|
||||
:outertype: IValue
|
||||
|
||||
toTensorList
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public Tensor[] toTensorList()
|
||||
:outertype: IValue
|
||||
|
||||
toTuple
|
||||
^^^^^^^
|
||||
|
||||
.. java:method:: public IValue[] toTuple()
|
||||
:outertype: IValue
|
||||
|
||||
tupleFrom
|
||||
^^^^^^^^^
|
||||
|
||||
.. java:method:: public static IValue tupleFrom(IValue... array)
|
||||
:outertype: IValue
|
||||
|
||||
Creates a new \ ``IValue``\ of type \ ``Tuple[T0, T1, ...]``\ .
|
@ -1,55 +0,0 @@
|
||||
.. java:import:: com.facebook.jni HybridData
|
||||
|
||||
Module
|
||||
======
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: public class Module
|
||||
|
||||
Java wrapper for torch::jit::script::Module.
|
||||
|
||||
Methods
|
||||
-------
|
||||
destroy
|
||||
^^^^^^^
|
||||
|
||||
.. java:method:: public void destroy()
|
||||
:outertype: Module
|
||||
|
||||
Explicitly destroys the native torch::jit::script::Module. Calling this method is not required, as the native object will be destroyed when this object is garbage-collected. However, the timing of garbage collection is not guaranteed, so proactively calling \ ``destroy``\ can free memory more quickly. See \ :java:ref:`com.facebook.jni.HybridData.resetNative`\ .
|
||||
|
||||
forward
|
||||
^^^^^^^
|
||||
|
||||
.. java:method:: public IValue forward(IValue... inputs)
|
||||
:outertype: Module
|
||||
|
||||
Runs the 'forward' method of this module with the specified arguments.
|
||||
|
||||
:param inputs: arguments for the TorchScript module's 'forward' method.
|
||||
:return: return value from the 'forward' method.
|
||||
|
||||
load
|
||||
^^^^
|
||||
|
||||
.. java:method:: public static Module load(String modelPath)
|
||||
:outertype: Module
|
||||
|
||||
Loads a serialized TorchScript module from the specified path on the disk.
|
||||
|
||||
:param modelPath: path to file that contains the serialized TorchScript module.
|
||||
:return: new \ :java:ref:`org.pytorch.Module`\ object which owns torch::jit::script::Module.
|
||||
|
||||
runMethod
|
||||
^^^^^^^^^
|
||||
|
||||
.. java:method:: public IValue runMethod(String methodName, IValue... inputs)
|
||||
:outertype: Module
|
||||
|
||||
Runs the specified method of this module with the specified arguments.
|
||||
|
||||
:param methodName: name of the TorchScript method to run.
|
||||
:param inputs: arguments that will be passed to TorchScript method.
|
||||
:return: return value from the method.
|
@ -1,60 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor.Tensor_float32
|
||||
=====================
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: static class Tensor_float32 extends Tensor
|
||||
:outertype: Tensor
|
||||
|
||||
Constructors
|
||||
------------
|
||||
Tensor_float32
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
.. java:constructor:: Tensor_float32(FloatBuffer data, long[] shape)
|
||||
:outertype: Tensor.Tensor_float32
|
||||
|
||||
Methods
|
||||
-------
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: @Override public DType dtype()
|
||||
:outertype: Tensor.Tensor_float32
|
||||
|
||||
getDataAsFloatArray
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public float[] getDataAsFloatArray()
|
||||
:outertype: Tensor.Tensor_float32
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override Buffer getRawDataBuffer()
|
||||
:outertype: Tensor.Tensor_float32
|
||||
|
||||
toString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public String toString()
|
||||
:outertype: Tensor.Tensor_float32
|
@ -1,52 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor.Tensor_float64
|
||||
=====================
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: static class Tensor_float64 extends Tensor
|
||||
:outertype: Tensor
|
||||
|
||||
Methods
|
||||
-------
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: @Override public DType dtype()
|
||||
:outertype: Tensor.Tensor_float64
|
||||
|
||||
getDataAsDoubleArray
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public double[] getDataAsDoubleArray()
|
||||
:outertype: Tensor.Tensor_float64
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override Buffer getRawDataBuffer()
|
||||
:outertype: Tensor.Tensor_float64
|
||||
|
||||
toString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public String toString()
|
||||
:outertype: Tensor.Tensor_float64
|
@ -1,52 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor.Tensor_int32
|
||||
===================
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: static class Tensor_int32 extends Tensor
|
||||
:outertype: Tensor
|
||||
|
||||
Methods
|
||||
-------
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: @Override public DType dtype()
|
||||
:outertype: Tensor.Tensor_int32
|
||||
|
||||
getDataAsIntArray
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public int[] getDataAsIntArray()
|
||||
:outertype: Tensor.Tensor_int32
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override Buffer getRawDataBuffer()
|
||||
:outertype: Tensor.Tensor_int32
|
||||
|
||||
toString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public String toString()
|
||||
:outertype: Tensor.Tensor_int32
|
@ -1,52 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor.Tensor_int64
|
||||
===================
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: static class Tensor_int64 extends Tensor
|
||||
:outertype: Tensor
|
||||
|
||||
Methods
|
||||
-------
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: @Override public DType dtype()
|
||||
:outertype: Tensor.Tensor_int64
|
||||
|
||||
getDataAsLongArray
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public long[] getDataAsLongArray()
|
||||
:outertype: Tensor.Tensor_int64
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override Buffer getRawDataBuffer()
|
||||
:outertype: Tensor.Tensor_int64
|
||||
|
||||
toString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public String toString()
|
||||
:outertype: Tensor.Tensor_int64
|
@ -1,52 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor.Tensor_int8
|
||||
==================
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: static class Tensor_int8 extends Tensor
|
||||
:outertype: Tensor
|
||||
|
||||
Methods
|
||||
-------
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: @Override public DType dtype()
|
||||
:outertype: Tensor.Tensor_int8
|
||||
|
||||
getDataAsByteArray
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public byte[] getDataAsByteArray()
|
||||
:outertype: Tensor.Tensor_int8
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override Buffer getRawDataBuffer()
|
||||
:outertype: Tensor.Tensor_int8
|
||||
|
||||
toString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public String toString()
|
||||
:outertype: Tensor.Tensor_int8
|
@ -1,52 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor.Tensor_uint8
|
||||
===================
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: static class Tensor_uint8 extends Tensor
|
||||
:outertype: Tensor
|
||||
|
||||
Methods
|
||||
-------
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: @Override public DType dtype()
|
||||
:outertype: Tensor.Tensor_uint8
|
||||
|
||||
getDataAsUnsignedByteArray
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public byte[] getDataAsUnsignedByteArray()
|
||||
:outertype: Tensor.Tensor_uint8
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: @Override Buffer getRawDataBuffer()
|
||||
:outertype: Tensor.Tensor_uint8
|
||||
|
||||
toString
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: @Override public String toString()
|
||||
:outertype: Tensor.Tensor_uint8
|
@ -1,315 +0,0 @@
|
||||
.. java:import:: java.nio Buffer
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio ByteOrder
|
||||
|
||||
.. java:import:: java.nio DoubleBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.nio IntBuffer
|
||||
|
||||
.. java:import:: java.nio LongBuffer
|
||||
|
||||
.. java:import:: java.util Arrays
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
Tensor
|
||||
======
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
:noindex:
|
||||
|
||||
.. java:type:: public abstract class Tensor
|
||||
|
||||
Representation of a Tensor. Behavior is similar to PyTorch's tensor objects.
|
||||
|
||||
Most tensors will be constructed as \ ``Tensor.fromBlob(data, shape)``\ , where \ ``data``\ can be an array or a direct \ :java:ref:`Buffer`\ (of the proper subclass). Helper methods are provided to allocate buffers properly.
|
||||
|
||||
To access Tensor data, see \ :java:ref:`dtype()`\ , \ :java:ref:`shape()`\ , and various \ ``getDataAs*``\ methods.
|
||||
|
||||
When constructing \ ``Tensor``\ objects with \ ``data``\ as an array, it is not specified whether this data is is copied or retained as a reference so it is recommended not to modify it after constructing. \ ``data``\ passed as a \ :java:ref:`Buffer`\ is not copied, so it can be modified between \ :java:ref:`Module`\ calls to avoid reallocation. Data retrieved from \ ``Tensor``\ objects may be copied or may be a reference to the \ ``Tensor``\ 's internal data buffer. \ ``shape``\ is always copied.
|
||||
|
||||
Methods
|
||||
-------
|
||||
allocateByteBuffer
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static ByteBuffer allocateByteBuffer(int numElements)
|
||||
:outertype: Tensor
|
||||
|
||||
Allocates a new direct \ :java:ref:`java.nio.ByteBuffer`\ with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(ByteBuffer,long[])`\ , \ :java:ref:`Tensor.fromBlobUnsigned(ByteBuffer,long[])`\ .
|
||||
|
||||
:param numElements: capacity (number of elements) of result buffer.
|
||||
|
||||
allocateDoubleBuffer
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static DoubleBuffer allocateDoubleBuffer(int numElements)
|
||||
:outertype: Tensor
|
||||
|
||||
Allocates a new direct \ :java:ref:`java.nio.DoubleBuffer`\ with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(DoubleBuffer,long[])`\ .
|
||||
|
||||
:param numElements: capacity (number of elements) of result buffer.
|
||||
|
||||
allocateFloatBuffer
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static FloatBuffer allocateFloatBuffer(int numElements)
|
||||
:outertype: Tensor
|
||||
|
||||
Allocates a new direct \ :java:ref:`java.nio.FloatBuffer`\ with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(FloatBuffer,long[])`\ .
|
||||
|
||||
:param numElements: capacity (number of elements) of result buffer.
|
||||
|
||||
allocateIntBuffer
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static IntBuffer allocateIntBuffer(int numElements)
|
||||
:outertype: Tensor
|
||||
|
||||
Allocates a new direct \ :java:ref:`java.nio.IntBuffer`\ with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(IntBuffer,long[])`\ .
|
||||
|
||||
:param numElements: capacity (number of elements) of result buffer.
|
||||
|
||||
allocateLongBuffer
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static LongBuffer allocateLongBuffer(int numElements)
|
||||
:outertype: Tensor
|
||||
|
||||
Allocates a new direct \ :java:ref:`java.nio.LongBuffer`\ with native byte order with specified capacity that can be used in \ :java:ref:`Tensor.fromBlob(LongBuffer,long[])`\ .
|
||||
|
||||
:param numElements: capacity (number of elements) of result buffer.
|
||||
|
||||
dtype
|
||||
^^^^^
|
||||
|
||||
.. java:method:: public abstract DType dtype()
|
||||
:outertype: Tensor
|
||||
|
||||
:return: data type of this tensor.
|
||||
|
||||
dtypeJniCode
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: int dtypeJniCode()
|
||||
:outertype: Tensor
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(byte[] data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.int8 with specified shape and data as array of bytes.
|
||||
|
||||
:param data: Tensor elements
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(int[] data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.int32 with specified shape and data as array of ints.
|
||||
|
||||
:param data: Tensor elements
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(float[] data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.float32 with specified shape and data as array of floats.
|
||||
|
||||
:param data: Tensor elements
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(long[] data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.int64 with specified shape and data as array of longs.
|
||||
|
||||
:param data: Tensor elements
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(long[] shape, double[] data)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.float64 with specified shape and data as array of doubles.
|
||||
|
||||
:param shape: Tensor shape
|
||||
:param data: Tensor elements
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(ByteBuffer data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.int8 with specified shape and data.
|
||||
|
||||
:param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\ elements. The buffer is used directly without copying, and changes to its content will change the tensor.
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(IntBuffer data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.int32 with specified shape and data.
|
||||
|
||||
:param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\ elements. The buffer is used directly without copying, and changes to its content will change the tensor.
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(FloatBuffer data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.float32 with specified shape and data.
|
||||
|
||||
:param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\ elements. The buffer is used directly without copying, and changes to its content will change the tensor.
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(LongBuffer data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.int64 with specified shape and data.
|
||||
|
||||
:param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\ elements. The buffer is used directly without copying, and changes to its content will change the tensor.
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlob
|
||||
^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlob(DoubleBuffer data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.float64 with specified shape and data.
|
||||
|
||||
:param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\ elements. The buffer is used directly without copying, and changes to its content will change the tensor.
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlobUnsigned
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlobUnsigned(byte[] data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.uint8 with specified shape and data as array of bytes.
|
||||
|
||||
:param data: Tensor elements
|
||||
:param shape: Tensor shape
|
||||
|
||||
fromBlobUnsigned
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor fromBlobUnsigned(ByteBuffer data, long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Creates a new Tensor instance with dtype torch.uint8 with specified shape and data.
|
||||
|
||||
:param data: Direct buffer with native byte order that contains \ ``Tensor.numel(shape)``\ elements. The buffer is used directly without copying, and changes to its content will change the tensor.
|
||||
:param shape: Tensor shape
|
||||
|
||||
getDataAsByteArray
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public byte[] getDataAsByteArray()
|
||||
:outertype: Tensor
|
||||
|
||||
:throws IllegalStateException: if it is called for a non-int8 tensor.
|
||||
:return: a Java byte array that contains the tensor data. This may be a copy or reference.
|
||||
|
||||
getDataAsDoubleArray
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public double[] getDataAsDoubleArray()
|
||||
:outertype: Tensor
|
||||
|
||||
:throws IllegalStateException: if it is called for a non-float64 tensor.
|
||||
:return: a Java double array that contains the tensor data. This may be a copy or reference.
|
||||
|
||||
getDataAsFloatArray
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public float[] getDataAsFloatArray()
|
||||
:outertype: Tensor
|
||||
|
||||
:throws IllegalStateException: if it is called for a non-float32 tensor.
|
||||
:return: a Java float array that contains the tensor data. This may be a copy or reference.
|
||||
|
||||
getDataAsIntArray
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public int[] getDataAsIntArray()
|
||||
:outertype: Tensor
|
||||
|
||||
:throws IllegalStateException: if it is called for a non-int32 tensor.
|
||||
:return: a Java int array that contains the tensor data. This may be a copy or reference.
|
||||
|
||||
getDataAsLongArray
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public long[] getDataAsLongArray()
|
||||
:outertype: Tensor
|
||||
|
||||
:throws IllegalStateException: if it is called for a non-int64 tensor.
|
||||
:return: a Java long array that contains the tensor data. This may be a copy or reference.
|
||||
|
||||
getDataAsUnsignedByteArray
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public byte[] getDataAsUnsignedByteArray()
|
||||
:outertype: Tensor
|
||||
|
||||
:throws IllegalStateException: if it is called for a non-uint8 tensor.
|
||||
:return: a Java byte array that contains the tensor data. This may be a copy or reference.
|
||||
|
||||
getRawDataBuffer
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: Buffer getRawDataBuffer()
|
||||
:outertype: Tensor
|
||||
|
||||
numel
|
||||
^^^^^
|
||||
|
||||
.. java:method:: public long numel()
|
||||
:outertype: Tensor
|
||||
|
||||
Returns the number of elements in this tensor.
|
||||
|
||||
numel
|
||||
^^^^^
|
||||
|
||||
.. java:method:: public static long numel(long[] shape)
|
||||
:outertype: Tensor
|
||||
|
||||
Calculates the number of elements in a tensor with the specified shape.
|
||||
|
||||
shape
|
||||
^^^^^
|
||||
|
||||
.. java:method:: public long[] shape()
|
||||
:outertype: Tensor
|
||||
|
||||
Returns the shape of this tensor. (The array is a fresh copy.)
|
@ -1,114 +0,0 @@
|
||||
.. java:import:: android.graphics Bitmap
|
||||
|
||||
.. java:import:: android.graphics ImageFormat
|
||||
|
||||
.. java:import:: android.media Image
|
||||
|
||||
.. java:import:: org.pytorch Tensor
|
||||
|
||||
.. java:import:: java.nio ByteBuffer
|
||||
|
||||
.. java:import:: java.nio FloatBuffer
|
||||
|
||||
.. java:import:: java.util Locale
|
||||
|
||||
TensorImageUtils
|
||||
================
|
||||
|
||||
.. java:package:: org.pytorch.torchvision
|
||||
:noindex:
|
||||
|
||||
.. java:type:: public final class TensorImageUtils
|
||||
|
||||
Contains utility functions for \ :java:ref:`org.pytorch.Tensor`\ creation from \ :java:ref:`android.graphics.Bitmap`\ or \ :java:ref:`android.media.Image`\ source.
|
||||
|
||||
Fields
|
||||
------
|
||||
TORCHVISION_NORM_MEAN_RGB
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:field:: public static float[] TORCHVISION_NORM_MEAN_RGB
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
TORCHVISION_NORM_STD_RGB
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:field:: public static float[] TORCHVISION_NORM_STD_RGB
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
Methods
|
||||
-------
|
||||
bitmapToFloat32Tensor
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor bitmapToFloat32Tensor(Bitmap bitmap, float[] normMeanRGB, float[] normStdRGB)
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
Creates new \ :java:ref:`org.pytorch.Tensor`\ from full \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std.
|
||||
|
||||
:param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
|
||||
:param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
|
||||
|
||||
bitmapToFloat32Tensor
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor bitmapToFloat32Tensor(Bitmap bitmap, int x, int y, int width, int height, float[] normMeanRGB, float[] normStdRGB)
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
Creates new \ :java:ref:`org.pytorch.Tensor`\ from specified area of \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std.
|
||||
|
||||
:param bitmap: \ :java:ref:`android.graphics.Bitmap`\ as a source for Tensor data
|
||||
:param x: - x coordinate of top left corner of bitmap's area
|
||||
:param y: - y coordinate of top left corner of bitmap's area
|
||||
:param width: - width of bitmap's area
|
||||
:param height: - height of bitmap's area
|
||||
:param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
|
||||
:param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
|
||||
|
||||
bitmapToFloatBuffer
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static void bitmapToFloatBuffer(Bitmap bitmap, int x, int y, int width, int height, float[] normMeanRGB, float[] normStdRGB, FloatBuffer outBuffer, int outBufferOffset)
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
Writes tensor content from specified \ :java:ref:`android.graphics.Bitmap`\ , normalized with specified in parameters mean and std to specified \ :java:ref:`java.nio.FloatBuffer`\ with specified offset.
|
||||
|
||||
:param bitmap: \ :java:ref:`android.graphics.Bitmap`\ as a source for Tensor data
|
||||
:param x: - x coordinate of top left corner of bitmap's area
|
||||
:param y: - y coordinate of top left corner of bitmap's area
|
||||
:param width: - width of bitmap's area
|
||||
:param height: - height of bitmap's area
|
||||
:param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
|
||||
:param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
|
||||
|
||||
imageYUV420CenterCropToFloat32Tensor
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static Tensor imageYUV420CenterCropToFloat32Tensor(Image image, int rotateCWDegrees, int tensorWidth, int tensorHeight, float[] normMeanRGB, float[] normStdRGB)
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
Creates new \ :java:ref:`org.pytorch.Tensor`\ from specified area of \ :java:ref:`android.media.Image`\ , doing optional rotation, scaling (nearest) and center cropping.
|
||||
|
||||
:param image: \ :java:ref:`android.media.Image`\ as a source for Tensor data
|
||||
:param rotateCWDegrees: Clockwise angle through which the input image needs to be rotated to be upright. Range of valid values: 0, 90, 180, 270
|
||||
:param tensorWidth: return tensor width, must be positive
|
||||
:param tensorHeight: return tensor height, must be positive
|
||||
:param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
|
||||
:param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
|
||||
|
||||
imageYUV420CenterCropToFloatBuffer
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. java:method:: public static void imageYUV420CenterCropToFloatBuffer(Image image, int rotateCWDegrees, int tensorWidth, int tensorHeight, float[] normMeanRGB, float[] normStdRGB, FloatBuffer outBuffer, int outBufferOffset)
|
||||
:outertype: TensorImageUtils
|
||||
|
||||
Writes tensor content from specified \ :java:ref:`android.media.Image`\ , doing optional rotation, scaling (nearest) and center cropping to specified \ :java:ref:`java.nio.FloatBuffer`\ with specified offset.
|
||||
|
||||
:param image: \ :java:ref:`android.media.Image`\ as a source for Tensor data
|
||||
:param rotateCWDegrees: Clockwise angle through which the input image needs to be rotated to be upright. Range of valid values: 0, 90, 180, 270
|
||||
:param tensorWidth: return tensor width, must be positive
|
||||
:param tensorHeight: return tensor height, must be positive
|
||||
:param normMeanRGB: means for RGB channels normalization, length must equal 3, RGB order
|
||||
:param normStdRGB: standard deviation for RGB channels normalization, length must equal 3, RGB order
|
||||
:param outBuffer: Output buffer, where tensor content will be written
|
||||
:param outBufferOffset: Output buffer offset with which tensor content will be written
|
@ -1,18 +0,0 @@
|
||||
org.pytorch
|
||||
===========
|
||||
|
||||
.. java:package:: org.pytorch
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
DType
|
||||
IValue
|
||||
Module
|
||||
Tensor
|
||||
Tensor-Tensor_float32
|
||||
Tensor-Tensor_float64
|
||||
Tensor-Tensor_int32
|
||||
Tensor-Tensor_int64
|
||||
Tensor-Tensor_int8
|
||||
Tensor-Tensor_uint8
|
@ -1,9 +0,0 @@
|
||||
rg.pytorch.torchvision
|
||||
=======================
|
||||
|
||||
.. java:package:: org.pytorch.torchvision
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
TensorImageUtils
|
@ -1,7 +0,0 @@
|
||||
Javadoc
|
||||
=======
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
org/pytorch/package-index
|
@ -42,6 +42,27 @@ The corresponding implementation is chosen automatically based on the PyTorch bu
|
||||
|
||||
Quantization-aware training (through :class:`~torch.quantization.FakeQuantize`) supports both CPU and CUDA.
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
When preparing a quantized model, it is necessary to ensure that qconfig and the engine used for quantized computations match
|
||||
the backend on which the model will be executed. Quantization currently supports two backends: fbgemm (for use on x86,
|
||||
`<https://github.com/pytorch/FBGEMM>`_) and qnnpack (for use on the ARM QNNPACK library `<https://github.com/pytorch/QNNPACK>`_).
|
||||
For example, if you are interested in quantizing a model to run on ARM, it is recommended to set the qconfig by calling:
|
||||
|
||||
``qconfig = torch.quantization.get_default_qconfig('qnnpack')``
|
||||
|
||||
for post training quantization and
|
||||
|
||||
``qconfig = torch.quantization.get_default_qat_qconfig('qnnpack')``
|
||||
|
||||
for quantization aware training.
|
||||
|
||||
In addition, the torch.backends.quantized.engine parameter should be set to match the backend. For using qnnpack for inference, the
|
||||
backend is set to qnnpack as follows
|
||||
|
||||
``torch.backends.quantized.engine = 'qnnpack'``
|
||||
|
||||
Quantized Tensors
|
||||
---------------------------------------
|
||||
|
||||
@ -111,7 +132,7 @@ Operations that are available from the ``torch`` namespace or as methods on Tens
|
||||
|
||||
* :func:`~torch.quantize_per_tensor` - Convert float tensor to quantized tensor with per-tensor scale and zero point
|
||||
* :func:`~torch.quantize_per_channel` - Convert float tensor to quantized tensor with per-channel scale and zero point
|
||||
* View-based operations like :meth:`~torch.Tensor.view`, :meth:`~torch.Tensor.as_strided`, :meth:`~torch.Tensor.expand`, :meth:`~torch.Tensor.flatten`, :meth:`~torch.Tensor.slice`, python-style indexing, etc - work as on regular tensor (if quantization is not per-channel)
|
||||
* View-based operations like :meth:`~torch.Tensor.view`, :meth:`~torch.Tensor.as_strided`, :meth:`~torch.Tensor.expand`, :meth:`~torch.Tensor.flatten`, :meth:`~torch.Tensor.select`, python-style indexing, etc - work as on regular tensor (if quantization is not per-channel)
|
||||
* Comparators
|
||||
* :meth:`~torch.Tensor.ne` — Not equal
|
||||
* :meth:`~torch.Tensor.eq` — Equal
|
||||
@ -132,12 +153,24 @@ Operations that are available from the ``torch`` namespace or as methods on Tens
|
||||
* :meth:`~torch.Tensor.q_per_channel_scales` — Returns the scales of the per-channel quantized tensor
|
||||
* :meth:`~torch.Tensor.q_per_channel_zero_points` — Returns the zero points of the per-channel quantized tensor
|
||||
* :meth:`~torch.Tensor.q_per_channel_axis` — Returns the channel axis of the per-channel quantized tensor
|
||||
* :meth:`~torch.Tensor.relu` — Rectified linear unit (copy)
|
||||
* :meth:`~torch.Tensor.relu_` — Rectified linear unit (inplace)
|
||||
* :meth:`~torch.Tensor.resize_` — In-place resize
|
||||
* :meth:`~torch.Tensor.sort` — Sorts the tensor
|
||||
* :meth:`~torch.Tensor.topk` — Returns k largest values of a tensor
|
||||
|
||||
``torch.nn.functional``
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Basic activations are supported.
|
||||
|
||||
* :meth:`~torch.nn.functional.relu` — Rectified linear unit (copy)
|
||||
* :meth:`~torch.nn.functional.relu_` — Rectified linear unit (inplace)
|
||||
* :meth:`~torch.nn.functional.max_pool2d` - Maximum pooling
|
||||
* :meth:`~torch.nn.functional.adaptive_avg_pool2d` - Adaptive average pooling
|
||||
* :meth:`~torch.nn.functional.avg_pool2d` - Average pooling
|
||||
* :meth:`~torch.nn.functional.interpolate` - Interpolation
|
||||
* :meth:`~torch.nn.functional.upsample` - Upsampling
|
||||
* :meth:`~torch.nn.functional.upsample_bilinear` - Bilinear Upsampling
|
||||
* :meth:`~torch.nn.functional.upsample_nearest` - Upsampling Nearest
|
||||
|
||||
``torch.nn.intrinsic``
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -432,7 +465,7 @@ Debugging utilities
|
||||
.. autofunction:: get_observer_dict
|
||||
.. autoclass:: RecordingObserver
|
||||
|
||||
torch.nn.instrinsic
|
||||
torch.nn.intrinsic
|
||||
--------------------------------
|
||||
|
||||
This module implements the combined (fused) modules conv + relu which can be then quantized.
|
||||
@ -546,6 +579,13 @@ Functional interface
|
||||
.. autofunction:: conv2d
|
||||
.. autofunction:: conv3d
|
||||
.. autofunction:: max_pool2d
|
||||
.. autofunction:: adaptive_avg_pool2d
|
||||
.. autofunction:: avg_pool2d
|
||||
.. autofunction:: interpolate
|
||||
.. autofunction:: upsample
|
||||
.. autofunction:: upsample_bilinear
|
||||
.. autofunction:: upsample_nearest
|
||||
|
||||
|
||||
.. automodule:: torch.nn.quantized
|
||||
|
||||
|
@ -55,7 +55,7 @@ This library provides primitives allowing users to create and modify references
|
||||
.. autofunction:: rpc_async
|
||||
.. autofunction:: remote
|
||||
.. autofunction:: get_worker_info
|
||||
.. autofunction:: wait_all_workers
|
||||
.. autofunction:: shutdown
|
||||
|
||||
Distributed Autograd Framework
|
||||
------------------------------
|
||||
|
@ -1,131 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "batch_permutation_op.h"
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
#include <caffe2/ideep/operators/operator_fallback_ideep.h>
|
||||
#include <caffe2/ideep/utils/ideep_operator.h>
|
||||
#endif
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
#ifdef CAFFE2_USE_MKLDNN
|
||||
REGISTER_IDEEP_OPERATOR(
|
||||
BatchPermutation,
|
||||
IDEEPFallbackOp<BatchPermutationOp<float, CPUContext>>);
|
||||
#endif
|
||||
|
||||
REGISTER_CPU_OPERATOR(BatchPermutation, BatchPermutationOp<float, CPUContext>);
|
||||
REGISTER_CPU_OPERATOR(
|
||||
BatchPermutationGradient,
|
||||
BatchPermutationGradientOp<float, CPUContext>);
|
||||
|
||||
OPERATOR_SCHEMA(BatchPermutation)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.SetDoc(R"DOC(
|
||||
Permute the batch elements of the input tensor X according to the permutation
|
||||
specified in the input indices.
|
||||
|
||||
Warning: this op does not verify that indices is a valid permutation; gradient
|
||||
comptuation is only correct if indices is a permutation.
|
||||
)DOC")
|
||||
.Input(
|
||||
0,
|
||||
"X",
|
||||
"Tensor of at least 1D shape (N, D0, D1, ...).")
|
||||
.Input(
|
||||
1,
|
||||
"indices",
|
||||
"1D tensor of type int with shape (N, ) specifying a valid permutation "
|
||||
"of the indices in [0, N - 1] (inclusive).")
|
||||
.Output(
|
||||
0,
|
||||
"Y",
|
||||
"Tensor with the same shape as X where the (D0, D1, ...) dimensional "
|
||||
"batch elements of X are permuted according to the input indices.");
|
||||
|
||||
OPERATOR_SCHEMA(BatchPermutationGradient)
|
||||
.NumInputs(2)
|
||||
.NumOutputs(1)
|
||||
.Input(
|
||||
0,
|
||||
"indices",
|
||||
"See BatchPermutation.")
|
||||
.Input(
|
||||
1,
|
||||
"dY",
|
||||
"Gradient of forward output 0 (Y).")
|
||||
.Output(
|
||||
0,
|
||||
"dX",
|
||||
"Gradient of forward input 0 (X).");
|
||||
|
||||
template <>
|
||||
bool BatchPermutationOp<float, CPUContext>::RunOnDevice() {
|
||||
const auto& X = Input(0);
|
||||
const auto& indices = Input(1);
|
||||
|
||||
CAFFE_ENFORCE_EQ(indices.dim(), 1, "indices must be 1-d");
|
||||
CAFFE_ENFORCE_EQ(
|
||||
X.dim32(0), indices.dim32(0),
|
||||
"X.dim32(0) must be equal to indices.dim32(0)",
|
||||
"(",
|
||||
X.dim32(0),
|
||||
" vs. ",
|
||||
indices.dim32(0),
|
||||
")");
|
||||
|
||||
auto* Y = Output(0, X.sizes(), at::dtype<float>());
|
||||
|
||||
const int N = X.dim32(0);
|
||||
const int C = X.dim32(1);
|
||||
const int H = X.dim32(2);
|
||||
const int W = X.dim32(3);
|
||||
|
||||
const float *src = X.template data<float>();
|
||||
float *dst = Y->template mutable_data<float>();
|
||||
|
||||
#ifdef _OPENMP
|
||||
#if (_OPENMP >= 201307)
|
||||
#pragma omp parallel for simd
|
||||
#else
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
#endif
|
||||
for (int i = 0; i < N; i++) {
|
||||
int idx = indices.template data<int>()[i];
|
||||
|
||||
std::memcpy(dst + i * C * H * W, src + idx * C * H * W, sizeof(float) * C * H * W);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
class GetBatchPermutationGradient : public GradientMakerBase {
|
||||
using GradientMakerBase::GradientMakerBase;
|
||||
vector<OperatorDef> GetGradientDefs() override {
|
||||
return SingleGradientDef(
|
||||
"BatchPermutationGradient",
|
||||
"",
|
||||
vector<string>{I(1), GO(0)},
|
||||
vector<string>{GI(0)});
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_GRADIENT(BatchPermutation, GetBatchPermutationGradient);
|
||||
|
||||
} // namespace caffe2
|
@ -1,112 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "modules/detectron/batch_permutation_op.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
namespace {
|
||||
template <bool forward>
|
||||
__global__ void BatchPermutationKernel(
|
||||
int N,
|
||||
int C,
|
||||
int H,
|
||||
int W,
|
||||
const float* src,
|
||||
const int* indices,
|
||||
float* dst) {
|
||||
CUDA_1D_KERNEL_LOOP(index, N * C * H * W) {
|
||||
int w = index % W;
|
||||
int h = (index / W) % H;
|
||||
int c = (index / W / H) % C;
|
||||
int n = (index / W / H / C);
|
||||
int idx = indices[n];
|
||||
if (forward) {
|
||||
dst[n * C * H * W + c * H * W + h * W + w] =
|
||||
src[idx * C * H * W + c * H * W + h * W + w];
|
||||
} else {
|
||||
dst[idx * C * H * W + c * H * W + h * W + w] =
|
||||
src[n * C * H * W + c * H * W + h * W + w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
bool BatchPermutationOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& X = Input(0);
|
||||
auto& indices = Input(1);
|
||||
|
||||
|
||||
CAFFE_ENFORCE(indices.ndim() == 1, "indices must be 1-d");
|
||||
CAFFE_ENFORCE(
|
||||
X.dim32(0) == indices.dim32(0),
|
||||
"X.dim32(0) must be equal to indices.dim32(0)",
|
||||
"(",
|
||||
X.dim32(0),
|
||||
" vs. ",
|
||||
indices.dim32(0),
|
||||
")");
|
||||
|
||||
auto* Y = Output(0, X.sizes(), at::dtype<float>());
|
||||
|
||||
BatchPermutationKernel<true><<<
|
||||
CAFFE_GET_BLOCKS(X.size()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0,
|
||||
context_.cuda_stream()>>>(
|
||||
X.dim32(0),
|
||||
X.dim32(1),
|
||||
X.dim32(2),
|
||||
X.dim32(3),
|
||||
X.data<float>(),
|
||||
indices.data<int>(),
|
||||
Y->mutable_data<float>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool BatchPermutationGradientOp<float, CUDAContext>::RunOnDevice() {
|
||||
auto& indices = Input(0);
|
||||
auto& dY = Input(1);
|
||||
|
||||
auto* dX = Output(0, dY.sizes(), at::dtype<float>());
|
||||
|
||||
BatchPermutationKernel<false><<<
|
||||
CAFFE_GET_BLOCKS(dY.size()),
|
||||
CAFFE_CUDA_NUM_THREADS,
|
||||
0,
|
||||
context_.cuda_stream()>>>(
|
||||
dY.dim32(0),
|
||||
dY.dim32(1),
|
||||
dY.dim32(2),
|
||||
dY.dim32(3),
|
||||
dY.data<float>(),
|
||||
indices.data<int>(),
|
||||
dX->mutable_data<float>());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
BatchPermutation,
|
||||
BatchPermutationOp<float, CUDAContext>);
|
||||
REGISTER_CUDA_OPERATOR(
|
||||
BatchPermutationGradient,
|
||||
BatchPermutationGradientOp<float, CUDAContext>);
|
||||
} // namespace caffe2
|
@ -1,53 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016-present, Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef BATCHPERMUTATION_OP_H_
|
||||
#define BATCHPERMUTATION_OP_H_
|
||||
|
||||
#include <cstring>
|
||||
#include "caffe2/core/context.h"
|
||||
#include "caffe2/core/logging.h"
|
||||
#include "caffe2/core/operator.h"
|
||||
#include "caffe2/utils/math.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
||||
template <typename T, class Context>
|
||||
class BatchPermutationOp final : public Operator<Context> {
|
||||
public:
|
||||
BatchPermutationOp(const OperatorDef& operator_def, Workspace* ws)
|
||||
: Operator<Context>(operator_def, ws) {}
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
|
||||
bool RunOnDevice() override;
|
||||
};
|
||||
|
||||
template <typename T, class Context>
|
||||
class BatchPermutationGradientOp final : public Operator<Context> {
|
||||
public:
|
||||
BatchPermutationGradientOp(const OperatorDef& def, Workspace* ws)
|
||||
: Operator<Context>(def, ws) {}
|
||||
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||
|
||||
bool RunOnDevice() override {
|
||||
// No CPU implementation for now
|
||||
CAFFE_NOT_IMPLEMENTED;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace caffe2
|
||||
|
||||
#endif // BATCHPERMUTATION_OP_H_
|
@ -17,17 +17,23 @@ from torch._C import parse_schema
|
||||
#
|
||||
# Whitelist entries can be removed after the date listed on them passes.
|
||||
white_list = [
|
||||
('c10_experimental', datetime.date(2020, 1, 1)),
|
||||
('_batch_norm_impl_index', datetime.date(2019, 11, 15)),
|
||||
('_batch_norm_impl_index_backward', datetime.date(2019, 11, 15)),
|
||||
('cudnn_batch_norm', datetime.date(2019, 11, 15)),
|
||||
('cudnn_batch_norm_backward', datetime.date(2019, 11, 15)),
|
||||
('_nnpack_spatial_convolution', datetime.date(2019, 11, 12)),
|
||||
('_aten', datetime.date(2019, 12, 22)),
|
||||
('_prim::ListConstruct', datetime.date(2019, 11, 22)),
|
||||
('thnn_conv3d', datetime.date(9999, 1, 1)),
|
||||
('thnn_conv3d.out', datetime.date(9999, 1, 1)),
|
||||
('grad', datetime.date(2020, 1, 1)),
|
||||
("aten::append", datetime.date(9999, 1, 1)),
|
||||
("prim::AutogradAnyNonZero", datetime.date(9999, 1, 1)),
|
||||
("aten::grad", datetime.date(9999, 1, 1)),
|
||||
("_c10_experimental", datetime.date(9999, 1, 1)),
|
||||
("aten::thnn_conv3d", datetime.date(9999, 1, 1)),
|
||||
("aten::native_layer_norm_double_backward", datetime.date(9999, 1, 1)),
|
||||
("aten::cudnn_batch_norm", datetime.date(9999, 1, 1)),
|
||||
("aten::cudnn_batch_norm_backward", datetime.date(9999, 1, 1)),
|
||||
("aten::_batch_norm_impl_index_backward", datetime.date(9999, 1, 1)),
|
||||
("aten::empty_like", datetime.date(9999, 1, 1)),
|
||||
("aten::_batch_norm_impl_index", datetime.date(9999, 1, 1)),
|
||||
("aten::index_fill_", datetime.date(9999, 1, 1)),
|
||||
("aten::index_fill", datetime.date(9999, 1, 1)),
|
||||
("aten::log_softmax", datetime.date(9999, 1, 1)),
|
||||
("aten::softmax", datetime.date(9999, 1, 1)),
|
||||
("aten::thnn_conv3d_forward", datetime.date(9999, 1, 1)),
|
||||
("aten::thnn_conv3d_backward.output_mask", datetime.date(9999, 1, 1)),
|
||||
]
|
||||
|
||||
|
||||
@ -43,6 +49,8 @@ def white_listed(schema, white_list):
|
||||
|
||||
def check_bc(new_schema_dict):
|
||||
existing_schemas = torch._C._jit_get_all_schemas()
|
||||
is_bc = True
|
||||
broken_ops = []
|
||||
for existing_schema in existing_schemas:
|
||||
if white_listed(existing_schema, white_list):
|
||||
print("skipping schema: ", str(existing_schema))
|
||||
@ -60,13 +68,17 @@ def check_bc(new_schema_dict):
|
||||
.format(
|
||||
str(existing_schema),
|
||||
"\n\t".join(str(s) for s in new_schemas)))
|
||||
print('The PR is introducing backward incompatible changes to the '
|
||||
'operator library. Please contact PyTorch team to confirm '
|
||||
'whether this change is wanted or not.')
|
||||
# TODO Print out more details about why candidates don't match.
|
||||
return False
|
||||
print('Found backward compatible schemas for all existing schemas')
|
||||
return True
|
||||
broken_ops.append(str(existing_schema))
|
||||
is_bc = False
|
||||
if is_bc:
|
||||
print('Found backward compatible schemas for all existing schemas')
|
||||
else:
|
||||
print('The PR is introducing backward incompatible changes to the '
|
||||
'operator library. Please contact PyTorch team to confirm '
|
||||
'whether this change is wanted or not. \n Broken ops: [\n{}]'
|
||||
.format("\n".join(broken_ops)))
|
||||
return is_bc
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -903,6 +903,15 @@ class TestCase(expecttest.TestCase):
|
||||
# Don't put this in the try block; the AssertionError will catch it
|
||||
self.fail(msg="Did not raise when expected to")
|
||||
|
||||
def assertNotWarn(self, callable, msg=''):
|
||||
r"""
|
||||
Test if :attr:`callable` does not raise a warning.
|
||||
"""
|
||||
with self._reset_warning_registry(), warnings.catch_warnings(record=True) as ws:
|
||||
warnings.simplefilter("always") # allow any warning to be raised
|
||||
callable()
|
||||
self.assertTrue(len(ws) == 0, msg)
|
||||
|
||||
def assertWarns(self, callable, msg=''):
|
||||
r"""
|
||||
Test if :attr:`callable` raises a warning.
|
||||
|
@ -145,7 +145,7 @@ TEST_F(ModuleTest, RegisterParameterUndefinedTensor) {
|
||||
{
|
||||
TestModel model;
|
||||
model.register_parameter("undefined_tensor", torch::Tensor(), /*requires_grad=*/false);
|
||||
ASSERT_FALSE(model.named_parameters()["undefined_tensor"].defined());
|
||||
ASSERT_EQ(model.parameters().size(), 0);
|
||||
}
|
||||
{
|
||||
std::stringstream buffer;
|
||||
@ -153,7 +153,7 @@ TEST_F(ModuleTest, RegisterParameterUndefinedTensor) {
|
||||
|
||||
TestModel model;
|
||||
model.register_parameter("undefined_tensor", torch::Tensor());
|
||||
ASSERT_FALSE(model.named_parameters()["undefined_tensor"].defined());
|
||||
ASSERT_EQ(model.parameters().size(), 0);
|
||||
|
||||
ASSERT_EQ(
|
||||
count_substr_occurrences(
|
||||
@ -221,6 +221,87 @@ TEST_F(ModuleTest, AsCastsModulesCorrectly) {
|
||||
ASSERT_EQ(unit.as<AGIUnit>(), &unit);
|
||||
}
|
||||
|
||||
void test_DeviceOrDtypeConversionSkipsUndefinedTensor(
|
||||
torch::Device to_device, torch::Dtype to_dtype) {
|
||||
{
|
||||
// Case 1: Undefined tensors as parameters
|
||||
Linear module(LinearOptions(10, 20).bias(false));
|
||||
ASSERT_TRUE(module->weight.defined());
|
||||
ASSERT_FALSE(module->bias.defined());
|
||||
|
||||
module->to(to_device);
|
||||
ASSERT_TRUE(module->weight.defined());
|
||||
ASSERT_EQ(module->weight.device().type(), to_device.type());
|
||||
ASSERT_FALSE(module->bias.defined());
|
||||
|
||||
module->to(to_dtype);
|
||||
ASSERT_TRUE(module->weight.defined());
|
||||
ASSERT_EQ(module->weight.dtype(), to_dtype);
|
||||
ASSERT_FALSE(module->bias.defined());
|
||||
}
|
||||
{
|
||||
// Case 2: Undefined tensors as buffers
|
||||
BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(false).affine(true));
|
||||
ASSERT_TRUE(module->weight.defined());
|
||||
ASSERT_FALSE(module->running_mean.defined());
|
||||
|
||||
module->to(to_device);
|
||||
ASSERT_TRUE(module->weight.defined());
|
||||
ASSERT_EQ(module->weight.device().type(), to_device.type());
|
||||
ASSERT_FALSE(module->running_mean.defined());
|
||||
|
||||
module->to(to_dtype);
|
||||
ASSERT_TRUE(module->weight.defined());
|
||||
ASSERT_EQ(module->weight.dtype(), to_dtype);
|
||||
ASSERT_FALSE(module->running_mean.defined());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModuleTest, DeviceOrDtypeConversionSkipsUndefinedTensor) {
|
||||
test_DeviceOrDtypeConversionSkipsUndefinedTensor(torch::kCPU, torch::kDouble);
|
||||
}
|
||||
|
||||
TEST_F(ModuleTest, DeviceOrDtypeConversionSkipsUndefinedTensor_CUDA) {
|
||||
test_DeviceOrDtypeConversionSkipsUndefinedTensor(torch::kCUDA, torch::kDouble);
|
||||
}
|
||||
|
||||
TEST_F(ModuleTest, ParametersAndBuffersAccessorSkipsUndefinedTensor) {
|
||||
{
|
||||
Linear module(LinearOptions(10, 20).bias(false));
|
||||
|
||||
auto params = module->parameters();
|
||||
ASSERT_EQ(params.size(), 1);
|
||||
auto named_params = module->named_parameters();
|
||||
ASSERT_EQ(named_params.size(), 1);
|
||||
|
||||
ASSERT_TRUE(pointer_equal(params[0], named_params["weight"]));
|
||||
ASSERT_TRUE(pointer_equal(named_params["weight"], module->weight));
|
||||
}
|
||||
{
|
||||
BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(false).affine(false));
|
||||
|
||||
auto buffers = module->buffers();
|
||||
ASSERT_EQ(buffers.size(), 0);
|
||||
auto named_buffers = module->named_buffers();
|
||||
ASSERT_EQ(named_buffers.size(), 0);
|
||||
}
|
||||
{
|
||||
BatchNorm1d module(BatchNorm1dOptions(5).track_running_stats(true).affine(false));
|
||||
|
||||
auto buffers = module->buffers();
|
||||
ASSERT_EQ(buffers.size(), 3);
|
||||
auto named_buffers = module->named_buffers();
|
||||
ASSERT_EQ(named_buffers.size(), 3);
|
||||
|
||||
ASSERT_TRUE(pointer_equal(buffers[0], named_buffers["running_mean"]));
|
||||
ASSERT_TRUE(pointer_equal(named_buffers["running_mean"], module->running_mean));
|
||||
ASSERT_TRUE(pointer_equal(buffers[1], named_buffers["running_var"]));
|
||||
ASSERT_TRUE(pointer_equal(named_buffers["running_var"], module->running_var));
|
||||
ASSERT_TRUE(pointer_equal(buffers[2], named_buffers["num_batches_tracked"]));
|
||||
ASSERT_TRUE(pointer_equal(named_buffers["num_batches_tracked"], module->num_batches_tracked));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModuleTest, Conversion_MultiCUDA) {
|
||||
Linear module(128, 64);
|
||||
for (auto& parameter : module->parameters()) {
|
||||
|
@ -46,7 +46,7 @@ private:
|
||||
};
|
||||
|
||||
inline bool pointer_equal(at::Tensor first, at::Tensor second) {
|
||||
return first.data_ptr<float>() == second.data_ptr<float>();
|
||||
return first.data_ptr() == second.data_ptr();
|
||||
}
|
||||
|
||||
inline int count_substr_occurrences(const std::string& str, const std::string& substr) {
|
||||
|
@ -1360,7 +1360,7 @@ class DistAutogradTest(RpcAgentTestFixture):
|
||||
# receive gradients from the node that received an error (and as a
|
||||
# result it didn't execute the rest of the graph).
|
||||
dist.barrier()
|
||||
rpc.wait_all_workers()
|
||||
rpc.shutdown()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
|
@ -72,6 +72,11 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
|
||||
|
||||
@wraps(old_test_method)
|
||||
def new_test_method(self, *arg, **kwargs):
|
||||
# Setting _ignore_rref_leak to make sure OwnerRRefs are properly deleted
|
||||
# in tests.
|
||||
import torch.distributed.rpc.api as api
|
||||
api._ignore_rref_leak = False
|
||||
|
||||
self.worker_id = self.rank
|
||||
|
||||
if setup_rpc:
|
||||
@ -83,7 +88,6 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
|
||||
rpc.init_rpc(
|
||||
name="worker%d" % self.rank,
|
||||
backend=self.rpc_backend,
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
@ -123,7 +127,7 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
|
||||
# since we need to shutdown the RPC agent. If we don't shutdown the
|
||||
# RPC agent, tests would fail since RPC agent threads, locks and
|
||||
# condition variables are not properly terminated.
|
||||
rpc.wait_all_workers()
|
||||
rpc.shutdown()
|
||||
|
||||
return return_value
|
||||
|
||||
@ -134,6 +138,7 @@ def dist_init(old_test_method=None, setup_rpc=True, clean_shutdown=True):
|
||||
TEST_CONFIG.rpc_backend_name = "PROCESS_GROUP"
|
||||
TEST_CONFIG.build_rpc_backend_options = lambda test_object: rpc.backend_registry.construct_rpc_backend_options(
|
||||
test_object.rpc_backend,
|
||||
init_method=test_object.init_method,
|
||||
# Use enough 'num_send_recv_threads' until we fix https://github.com/pytorch/pytorch/issues/26359
|
||||
num_send_recv_threads=16,
|
||||
)
|
||||
|
@ -6,7 +6,7 @@ import hypothesis
|
||||
from hypothesis import assume
|
||||
from hypothesis import strategies as st
|
||||
from hypothesis.extra import numpy as stnp
|
||||
from hypothesis.searchstrategy import SearchStrategy
|
||||
from hypothesis.strategies import SearchStrategy
|
||||
|
||||
from common_quantized import _calculate_dynamic_qparams, _calculate_dynamic_per_channel_qparams
|
||||
|
||||
@ -304,10 +304,11 @@ def tensor_conv(
|
||||
|
||||
return X, W, b, groups
|
||||
|
||||
# Disable deadline testing if this version of hypthesis supports it, otherwise
|
||||
# just return the original function
|
||||
def no_deadline(fn):
|
||||
try:
|
||||
return hypothesis.settings(deadline=None)(fn)
|
||||
except hypothesis.errors.InvalidArgument:
|
||||
return fn
|
||||
from hypothesis import settings
|
||||
settings.register_profile("no_deadline", deadline=None)
|
||||
settings.load_profile("no_deadline")
|
||||
|
||||
# This is really just to get flake8 to not complain when this file
|
||||
# is imported purely for the side-effectful stuff above
|
||||
def assert_deadline_disabled():
|
||||
assert settings().deadline is None
|
||||
|
@ -183,31 +183,6 @@ class TestONNXRuntime(unittest.TestCase):
|
||||
x = torch.randn(2, 3, 224, 224, requires_grad=True)
|
||||
self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
|
||||
|
||||
def test_googlenet_quantization(self):
|
||||
model = torchvision.models.quantization.googlenet(pretrained=True)
|
||||
x = torch.randn(2, 3, 224, 224, requires_grad=True)
|
||||
self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
|
||||
|
||||
def test_inception_quantization(self):
|
||||
model = torchvision.models.quantization.inception_v3(pretrained=True)
|
||||
x = torch.randn(2, 3, 224, 224, requires_grad=True)
|
||||
self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
|
||||
|
||||
def test_mobilenet_quantization(self):
|
||||
model = torchvision.models.quantization.mobilenet_v2(pretrained=True)
|
||||
x = torch.randn(2, 3, 224, 224, requires_grad=True)
|
||||
self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
|
||||
|
||||
def test_resnet_quantization(self):
|
||||
model = torchvision.models.quantization.resnet50(pretrained=True)
|
||||
x = torch.randn(2, 3, 224, 224, requires_grad=True)
|
||||
self.run_test(model, (x,))
|
||||
|
||||
def test_shufflenet_quantization(self):
|
||||
model = torchvision.models.quantization.shufflenet_v2_x1_0(pretrained=True)
|
||||
x = torch.randn(2, 3, 224, 224, requires_grad=True)
|
||||
self.run_test(model, (x,), rtol=1e-3, atol=1e-5)
|
||||
|
||||
def test_r3d_18_video(self):
|
||||
model = torchvision.models.video.r3d_18(pretrained=True)
|
||||
x = torch.randn(1, 3, 4, 112, 112, requires_grad=True)
|
||||
@ -238,6 +213,55 @@ class TestONNXRuntime(unittest.TestCase):
|
||||
# Only support CPU version, since tracer is not working in GPU RNN.
|
||||
self.run_test(model, (x, model.hidden))
|
||||
|
||||
def get_image_from_url(self, url):
|
||||
import sys
|
||||
import os
|
||||
if sys.version_info < (3,):
|
||||
from urlparse import urlsplit
|
||||
import urllib2
|
||||
request = urllib2
|
||||
else:
|
||||
from urllib.parse import urlsplit
|
||||
from urllib import request
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
from torch._utils_internal import get_writable_path
|
||||
|
||||
filename = os.path.basename(urlsplit(url)[2])
|
||||
data_dir = get_writable_path(os.path.join(os.path.dirname(__file__)))
|
||||
path = os.path.join(data_dir, filename)
|
||||
data = request.urlopen(url, timeout=15).read()
|
||||
with open(path, 'wb') as f:
|
||||
f.write(data)
|
||||
image = Image.open(path).convert("RGB")
|
||||
image = image.resize((300, 200), Image.BILINEAR)
|
||||
to_tensor = transforms.ToTensor()
|
||||
return to_tensor(image)
|
||||
|
||||
def get_test_images(self):
|
||||
image_url = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg"
|
||||
image = self.get_image_from_url(url=image_url)
|
||||
images = [image]
|
||||
return images
|
||||
|
||||
@skipIfUnsupportedMinOpsetVersion(11)
|
||||
def test_keypoint_rcnn(self):
|
||||
class KeyPointRCNN(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super(KeyPointRCNN, self).__init__()
|
||||
self.model = torchvision.models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True,
|
||||
min_size=200,
|
||||
max_size=300)
|
||||
|
||||
def forward(self, images):
|
||||
output = self.model(images)
|
||||
# TODO: The keypoints_scores require the use of Argmax that is updated in ONNX.
|
||||
# For now we are testing all the output of KeypointRCNN except keypoints_scores.
|
||||
# Enable When Argmax is updated in ONNX Runtime.
|
||||
return output[0]['boxes'], output[0]['labels'], output[0]['scores'], output[0]['keypoints']
|
||||
images = self.get_test_images()
|
||||
self.run_test(KeyPointRCNN(), (images,), rtol=1e-3, atol=1e-5)
|
||||
|
||||
def test_word_language_model_RNN_TANH(self):
|
||||
self.run_word_language_model("RNN_TANH")
|
||||
|
||||
|
148
test/rpc_test.py
148
test/rpc_test.py
@ -3,6 +3,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||
import concurrent.futures
|
||||
from datetime import timedelta
|
||||
import sys
|
||||
import time
|
||||
import unittest
|
||||
from collections import namedtuple
|
||||
from unittest import mock
|
||||
@ -18,6 +19,21 @@ from torch.distributed.rpc.api import _use_rpc_pickler
|
||||
from torch.distributed.rpc.internal import PythonUDF, _internal_rpc_pickler
|
||||
from rpc_agent_test_fixture import RpcAgentTestFixture
|
||||
|
||||
rpc_done = [False, False, False, False]
|
||||
|
||||
# TODO: dedupe this with the code in dist_autograd_test.py.
|
||||
# Send rpc done info and context_id to
|
||||
# dst_rank = (self.rank + rank_distance) % self.world_size
|
||||
# we don't need a lock here since the GIL is held while executing remote
|
||||
# python UDFs, so access is serialized across several workers.
|
||||
def _set_rpc_done(rank_distance):
|
||||
global rpc_done
|
||||
rpc_done[rank_distance] = True
|
||||
|
||||
def _check_rpc_done(rank_distance):
|
||||
while not rpc_done[rank_distance]:
|
||||
# yield control to other threads
|
||||
time.sleep(0)
|
||||
|
||||
def requires_process_group_agent(message=""):
|
||||
def decorator(old_func):
|
||||
@ -127,7 +143,6 @@ def my_tensor_function(a, b):
|
||||
return a + b
|
||||
|
||||
def my_sleep_func(seconds=1):
|
||||
import time
|
||||
time.sleep(seconds)
|
||||
|
||||
|
||||
@ -306,7 +321,6 @@ class RpcTest(RpcAgentTestFixture):
|
||||
rpc.init_rpc(
|
||||
name="worker1",
|
||||
backend=backend,
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
@ -327,14 +341,13 @@ class RpcTest(RpcAgentTestFixture):
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
)
|
||||
rpc.wait_all_workers()
|
||||
rpc.shutdown()
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
def test_reinit(self):
|
||||
rpc.init_rpc(
|
||||
name="worker{}".format(self.rank),
|
||||
backend=self.rpc_backend,
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
@ -357,13 +370,13 @@ class RpcTest(RpcAgentTestFixture):
|
||||
rpc.init_rpc(
|
||||
name="worker{}".format(self.rank),
|
||||
backend=self.rpc_backend,
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
)
|
||||
rpc.wait_all_workers()
|
||||
rpc.shutdown()
|
||||
|
||||
@unittest.skip("test_invalid_names is flaky, see https://github.com/pytorch/pytorch/issues/25912")
|
||||
@dist_init(setup_rpc=False)
|
||||
def test_invalid_names(self):
|
||||
with self.assertRaisesRegex(RuntimeError, "Worker name must match"):
|
||||
@ -430,8 +443,8 @@ class RpcTest(RpcAgentTestFixture):
|
||||
|
||||
from torch.distributed.rpc.api import _agent
|
||||
self.assertEqual(_agent, None)
|
||||
# wait_all_workers() should not do anything as _agent is None
|
||||
rpc.wait_all_workers()
|
||||
# shutdown() should not do anything as _agent is None
|
||||
rpc.shutdown()
|
||||
# We need this barrier here because although init_process_group is
|
||||
# blocking, it does not guarantee that all ranks are done with
|
||||
# initialization after the call. We did run into issues with it where
|
||||
@ -508,12 +521,11 @@ class RpcTest(RpcAgentTestFixture):
|
||||
self.assertEqual(ret, torch.ones(n, n) * 2)
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
def test_wait_all_workers(self):
|
||||
def test_shutdown(self):
|
||||
# Initialize RPC.
|
||||
rpc.init_rpc(
|
||||
name="worker%d" % self.rank,
|
||||
backend=self.rpc_backend,
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
@ -527,7 +539,7 @@ class RpcTest(RpcAgentTestFixture):
|
||||
args=(torch.ones(n, n), torch.ones(n, n)),
|
||||
)
|
||||
self.assertEqual(ret, torch.ones(n, n) * 2)
|
||||
rpc.wait_all_workers()
|
||||
rpc.shutdown()
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, "^RPC has not been initialized"):
|
||||
rpc.rpc_sync(
|
||||
@ -536,8 +548,8 @@ class RpcTest(RpcAgentTestFixture):
|
||||
args=(torch.ones(n, n), torch.ones(n, n)),
|
||||
)
|
||||
|
||||
# it's safe to call wait_all_workers() multiple times
|
||||
rpc.wait_all_workers()
|
||||
# it's safe to call shutdown() multiple times
|
||||
rpc.shutdown()
|
||||
|
||||
@dist_init
|
||||
def test_expected_src(self):
|
||||
@ -701,8 +713,6 @@ class RpcTest(RpcAgentTestFixture):
|
||||
self.assertEqual(ret, torch.ones(2, 2) + 1)
|
||||
|
||||
def _stress_test_rpc(self, f, repeat=1000, args=()):
|
||||
import time
|
||||
|
||||
n = self.rank + 1
|
||||
dst_rank = n % self.world_size
|
||||
futs = []
|
||||
@ -1090,6 +1100,111 @@ class RpcTest(RpcAgentTestFixture):
|
||||
|
||||
self.assertEqual(result, sum(vals))
|
||||
|
||||
def _test_rref_leak(self, ignore_leak=False):
|
||||
rpc.init_rpc(
|
||||
name="worker{}".format(self.rank),
|
||||
backend=self.rpc_backend,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
)
|
||||
|
||||
# This is for the below `dist.barrier`.
|
||||
# For `RpcAgent` other than `ProcessGroupAgent`,
|
||||
# no `_default_pg` is initialized.
|
||||
if not dist.is_initialized():
|
||||
dist.init_process_group(
|
||||
backend="gloo",
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
)
|
||||
# Wait for all init to complete.
|
||||
dist.barrier()
|
||||
|
||||
rref = rpc.remote(
|
||||
"worker{}".format((self.rank + 1) % self.world_size),
|
||||
torch.add,
|
||||
args=(torch.ones(2, 2), 1)
|
||||
)
|
||||
|
||||
if ignore_leak:
|
||||
import torch.distributed.rpc.api as api
|
||||
api._ignore_rref_leak = True
|
||||
|
||||
rpc.shutdown()
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
def test_rref_leak(self):
|
||||
with self.assertRaisesRegex(RuntimeError, "Leaking RRef"):
|
||||
self._test_rref_leak()
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
def test_ignore_rref_leak(self):
|
||||
self._test_rref_leak(ignore_leak=True)
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
@requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
|
||||
def test_local_shutdown(self):
|
||||
# test that we can start RPC and then immediately locally shutdown
|
||||
# without sending any messages.
|
||||
rpc.init_rpc(
|
||||
name="worker%d" % self.rank,
|
||||
backend=rpc.backend_registry.BackendType[
|
||||
dist_utils.TEST_CONFIG.rpc_backend_name
|
||||
],
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
)
|
||||
# pass in graceful=False to ensure that we don't wait for other workers.
|
||||
rpc.shutdown(graceful=False)
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
@requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
|
||||
def test_local_shutdown_with_rpc(self):
|
||||
# test that we can start RPC, send RPCs, and then run local shutdown.
|
||||
rpc.init_rpc(
|
||||
name="worker%d" % self.rank,
|
||||
backend=rpc.backend_registry.BackendType[
|
||||
dist_utils.TEST_CONFIG.rpc_backend_name
|
||||
],
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options,
|
||||
)
|
||||
n = self.rank + 1
|
||||
dst_rank = n % self.world_size
|
||||
ret = rpc.rpc_sync(
|
||||
"worker{}".format(dst_rank),
|
||||
torch.add,
|
||||
args=(torch.ones(n, n), torch.ones(n, n)),
|
||||
)
|
||||
# wait for RPCs to be done, so that some workers don't try to shut down
|
||||
# too early.
|
||||
rpc.rpc_sync("worker{}".format(dst_rank), _set_rpc_done, args=(1,))
|
||||
_check_rpc_done(1)
|
||||
# pass in graceful=False to ensure that we don't wait for other workers.
|
||||
rpc.shutdown(graceful=False)
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
@requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
|
||||
def test_wait_all_workers_and_shutdown(self):
|
||||
# This tests ensures that both rpc._wait_all_workers() and rpc.shutdown() can be
|
||||
# called without errors being raised due to attempting to shut down
|
||||
# multiple times.
|
||||
rpc.init_rpc(
|
||||
name="worker%d" % self.rank,
|
||||
backend=rpc.backend_registry.BackendType[dist_utils.TEST_CONFIG.rpc_backend_name],
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=self.rpc_backend_options
|
||||
)
|
||||
from torch.distributed.rpc.api import _wait_all_workers
|
||||
# intentional call to internal _wait_all_workers.
|
||||
_wait_all_workers()
|
||||
rpc.shutdown()
|
||||
|
||||
@dist_init(setup_rpc=False)
|
||||
def test_get_rpc_timeout(self):
|
||||
timeout = timedelta(seconds=1)
|
||||
@ -1102,14 +1217,13 @@ class RpcTest(RpcAgentTestFixture):
|
||||
rpc.init_rpc(
|
||||
name="worker{}".format(self.rank),
|
||||
backend=self.rpc_backend,
|
||||
init_method=self.init_method,
|
||||
rank=self.rank,
|
||||
world_size=self.world_size,
|
||||
rpc_backend_options=rpc_backend_options,
|
||||
)
|
||||
set_timeout = rpc.get_rpc_timeout()
|
||||
self.assertEqual(timeout, set_timeout)
|
||||
rpc.wait_all_workers()
|
||||
rpc.shutdown()
|
||||
|
||||
@dist_init
|
||||
@requires_process_group_agent("PROCESS_GROUP rpc backend specific test, skip")
|
||||
|
@ -422,6 +422,9 @@ class WorkerSpecificIterableDataset(IterableDataset):
|
||||
assert worker_info is not None
|
||||
return iter(range(self.sizes_for_all_workers[worker_info.id]))
|
||||
|
||||
def __len__(self):
|
||||
return sum(self.sizes_for_all_workers)
|
||||
|
||||
|
||||
# Inspired by https://stackoverflow.com/a/26703365
|
||||
# If all workers will call `sync_once`, they will be blocked until all workers
|
||||
@ -961,8 +964,8 @@ class TestDataLoader(TestCase):
|
||||
# non-batched should not convert ints into tensors
|
||||
self.assertIsInstance(d, torch._six.int_classes)
|
||||
self.assertEqual(d, i)
|
||||
with self.assertRaisesRegex(TypeError, "Cannot determine the DataLoader length of a IterableDataset"):
|
||||
len(dataloader) # DataLoader with iterable-style dataset should error in __len__
|
||||
# DataLoader should match len of the iterable-style dataset (if implemented)
|
||||
self.assertEqual(len(dataloader), len(dataset))
|
||||
|
||||
# [no auto-batching] multiprocessing loading
|
||||
num_workers = 3
|
||||
@ -978,8 +981,26 @@ class TestDataLoader(TestCase):
|
||||
# non-batched should not convert ints into tensors
|
||||
self.assertIsInstance(a, torch._six.int_classes)
|
||||
self.assertEqual(a, b)
|
||||
with self.assertRaisesRegex(TypeError, "Cannot determine the DataLoader length of a IterableDataset"):
|
||||
len(dataloader) # DataLoader with iterable-style dataset should error in __len__
|
||||
# DataLoader should match len of the iterable-style dataset (if implemented)
|
||||
self.assertEqual(len(dataloader), len(dataset))
|
||||
# When loading more than len(dataset) data, after accessing len(dataloader),
|
||||
# we should get a warning. See NOTE [ IterableDataset and __len__ ].
|
||||
dataset = CountingIterableDataset(20)
|
||||
dataloader = DataLoader(dataset, num_workers=num_workers,
|
||||
worker_init_fn=set_faulthander_if_available)
|
||||
it = iter(dataloader)
|
||||
for _ in range(40):
|
||||
self.assertNotWarn(lambda: next(it), "Should not warn before accessing len(dataloader)")
|
||||
self.assertEqual(len(dataloader), len(dataset))
|
||||
self.assertEqual(len(dataloader), 20)
|
||||
it = iter(dataloader)
|
||||
for _ in range(20):
|
||||
self.assertNotWarn(lambda: next(it), "Should not warn before exceeding length")
|
||||
for _ in range(3):
|
||||
self.assertWarnsRegex(
|
||||
lambda: next(it),
|
||||
r"but [0-9]+ samples have been fetched\. For multiprocessing data-loading, this",
|
||||
"Should always warn after exceeding length")
|
||||
|
||||
# [no auto-batching] test that workers exit gracefully
|
||||
workers = dataloader_iter._workers
|
||||
|
@ -5,7 +5,7 @@ import numpy as np
|
||||
from hypothesis import given
|
||||
from hypothesis import strategies as st
|
||||
import hypothesis_utils as hu
|
||||
from hypothesis_utils import no_deadline
|
||||
hu.assert_deadline_disabled()
|
||||
from common_utils import run_tests, TestCase
|
||||
from torch.quantization import FakeQuantize
|
||||
from torch.quantization import default_observer, default_per_channel_weight_observer
|
||||
@ -64,10 +64,8 @@ NP_RANDOM_SEED = 19
|
||||
tolerance = 1e-6
|
||||
|
||||
class TestFakeQuantizePerTensor(TestCase):
|
||||
# NOTE: Tests in this class are decorated with no_deadline
|
||||
# to prevent spurious failures due to cuda runtime initialization.
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)))
|
||||
@ -85,7 +83,7 @@ class TestFakeQuantizePerTensor(TestCase):
|
||||
X, scale, zero_point, quant_min, quant_max)
|
||||
np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)))
|
||||
@ -108,7 +106,8 @@ class TestFakeQuantizePerTensor(TestCase):
|
||||
Y_prime.backward(dout)
|
||||
np.testing.assert_allclose(dX.cpu(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance)
|
||||
|
||||
@no_deadline
|
||||
# https://github.com/pytorch/pytorch/issues/30604
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)))
|
||||
@ -127,7 +126,7 @@ class TestFakeQuantizePerTensor(TestCase):
|
||||
X, scale, zero_point, quant_min, quant_max)
|
||||
np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=[torch.quint8])),
|
||||
@ -206,10 +205,8 @@ class TestFakeQuantizePerTensor(TestCase):
|
||||
|
||||
|
||||
class TestFakeQuantizePerChannel(TestCase):
|
||||
# NOTE: Tests in this class are decorated with no_deadline
|
||||
# to prevent spurious failures due to cuda runtime initialization.
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)))
|
||||
@ -229,7 +226,7 @@ class TestFakeQuantizePerChannel(TestCase):
|
||||
X, scale, zero_point, axis, quant_min, quant_max)
|
||||
np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)))
|
||||
@ -253,11 +250,10 @@ class TestFakeQuantizePerChannel(TestCase):
|
||||
Y_prime.backward(dout)
|
||||
np.testing.assert_allclose(dX.cpu().detach().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance)
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.per_channel_tensor(shapes=hu.array_shapes(1, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)))
|
||||
@unittest.skip("temporarily disable the test")
|
||||
def test_numerical_consistency_per_channel(self, device, X):
|
||||
r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op
|
||||
"""
|
||||
@ -275,7 +271,7 @@ class TestFakeQuantizePerChannel(TestCase):
|
||||
X, scale, zero_point, axis, quant_min, quant_max)
|
||||
np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
|
||||
|
||||
@no_deadline
|
||||
@unittest.skip("temporarily disable the test")
|
||||
@given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),
|
||||
X=hu.per_channel_tensor(shapes=hu.array_shapes(2, 5,),
|
||||
qparams=hu.qparams(dtypes=torch.qint8)))
|
||||
|
@ -3564,6 +3564,38 @@ graph(%Ra, %Rb):
|
||||
self.assertTrue(type(block.paramNode()) == torch._C.Node)
|
||||
self.assertTrue(tested_blocks)
|
||||
|
||||
def test_export_opnames(self):
|
||||
class Foo(torch.jit.ScriptModule):
|
||||
def __init__(self):
|
||||
super(Foo, self).__init__()
|
||||
|
||||
def one(self, x, y):
|
||||
# type: (Tensor, Tensor) -> Tensor
|
||||
return x + y
|
||||
|
||||
def two(self, x):
|
||||
# type: (Tensor) -> Tensor
|
||||
return 2 * x
|
||||
|
||||
@torch.jit.script_method
|
||||
def forward(self, x):
|
||||
# type: (Tensor) -> Tensor
|
||||
return self.one(self.two(x), x)
|
||||
|
||||
class Bar(torch.jit.ScriptModule):
|
||||
def __init__(self):
|
||||
super(Bar, self).__init__()
|
||||
self.sub = Foo()
|
||||
|
||||
def forward(self, x):
|
||||
# type: (Tensor) -> Tensor
|
||||
return self.sub.forward(x)
|
||||
|
||||
bar = Bar()
|
||||
ops = torch.jit.export_opnames(bar)
|
||||
expected = ['aten::add.Tensor', 'aten::mul.Scalar', 'prim::Constant']
|
||||
self.assertEqual(ops, expected)
|
||||
|
||||
def test_pytorch_jit_env_off(self):
|
||||
import subprocess
|
||||
env = os.environ.copy()
|
||||
@ -7037,6 +7069,15 @@ a")
|
||||
self.checkScript(func1, (), optimize=True)
|
||||
self.checkScript(func2, (), optimize=True)
|
||||
|
||||
# FIXME: get rid of this once we have actual ops using optional floats
|
||||
def test_optional_float(self):
|
||||
def _test_optional_float(x, scale):
|
||||
# type: (Tensor, Optional[float]) -> torch.Tensor
|
||||
return torch._test_optional_float(x, scale=scale)
|
||||
|
||||
self.assertEqual([0], torch.jit.script(_test_optional_float)(torch.randn(()), None).shape)
|
||||
self.assertEqual((), torch.jit.script(_test_optional_float)(torch.randn(()), 2.5).shape)
|
||||
|
||||
def _test_tensor_number_math(self, device='cpu'):
|
||||
template = dedent('''
|
||||
def func(t):
|
||||
|
@ -1038,6 +1038,11 @@ class TestNamedTensor(TestCase):
|
||||
self.assertEqual(out.names, ('N', 'C', 'H', 'W', 'K'))
|
||||
self.assertEqual(out.shape, (7, 2, 3, 5, 11))
|
||||
|
||||
# takes negative positional dim
|
||||
out = tensor.unflatten(-2, (('C', 2), ('H', 3), ('W', 5)))
|
||||
self.assertEqual(out.names, ('N', 'C', 'H', 'W', 'K'))
|
||||
self.assertEqual(out.shape, (7, 2, 3, 5, 11))
|
||||
|
||||
with self.assertRaisesRegex(RuntimeError, "don't multiply up to"):
|
||||
tensor.unflatten('D', (('H', 3), ('W', 5)))
|
||||
|
||||
|
@ -6232,6 +6232,38 @@ class TestNN(NNTestCase):
|
||||
inp = torch.randn(4, 5, device='cuda', requires_grad=True)
|
||||
gradgradcheck(F.pdist, (inp,))
|
||||
|
||||
def test_cosine_embedding_loss_with_diff_type(self):
|
||||
for device in device_():
|
||||
input1 = torch.tensor([[2, 3, 4], [6, 2, 4]], dtype=torch.double, device=device)
|
||||
input2 = torch.tensor([[2, 3, 5], [3, 2, 1]], dtype=torch.double, device=device)
|
||||
target = torch.tensor([1, -1], dtype=torch.int, device=device)
|
||||
expected = torch.nn.functional.cosine_embedding_loss(input1, input2, target)
|
||||
for dt1 in torch.testing.get_all_math_dtypes(device):
|
||||
for dt2 in torch.testing.get_all_math_dtypes(device):
|
||||
for dt3 in torch.testing.get_all_math_dtypes(device):
|
||||
# dt3 is used as dtype for target = [1, -1], so let's skip unsigned type
|
||||
if dt3 == torch.uint8:
|
||||
continue
|
||||
input1 = input1.to(dt1)
|
||||
input2 = input2.to(dt2)
|
||||
target = target.to(dt3)
|
||||
result = torch.nn.functional.cosine_embedding_loss(input1, input2, target)
|
||||
self.assertEqual(result.item(), expected.item(), 0.001)
|
||||
|
||||
def test_kl_div_with_diff_type(self):
|
||||
for device in device_():
|
||||
input = torch.tensor([[2, 3, 5], [3, 2, 1]], dtype=torch.double, device=device)
|
||||
target = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.double, device=device)
|
||||
expected = torch.nn.functional.kl_div(input, target)
|
||||
for input_dtype in torch.testing.get_all_math_dtypes(device):
|
||||
for target_dtype in [torch.float32, torch.float64, torch.float16]:
|
||||
if (torch.device(device).type == 'cpu' and target_dtype == torch.float16):
|
||||
continue
|
||||
input = input.to(input_dtype)
|
||||
target = target.to(target_dtype)
|
||||
result = torch.nn.functional.kl_div(input, target)
|
||||
self.assertEqual(result.item(), expected.item(), 0.001)
|
||||
|
||||
def test_cosine_embedding_loss_no_reduce(self):
|
||||
input1 = torch.randn(15, 10, requires_grad=True)
|
||||
input2 = torch.randn(15, 10, requires_grad=True)
|
||||
|
@ -309,6 +309,30 @@ class TestNumbaIntegration(common.TestCase):
|
||||
torch_ary += 42
|
||||
self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary) + 42)
|
||||
|
||||
@unittest.skipIf(not TEST_NUMPY, "No numpy")
|
||||
@unittest.skipIf(not TEST_CUDA, "No cuda")
|
||||
@unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
|
||||
def test_from_cuda_array_interface_inferred_strides(self):
|
||||
"""torch.as_tensor(numba_ary) should have correct inferred (contiguous) strides"""
|
||||
# This could, in theory, be combined with test_from_cuda_array_interface but that test
|
||||
# is overly strict: it checks that the exported protocols are exactly the same, which
|
||||
# cannot handle differening exported protocol versions.
|
||||
dtypes = [
|
||||
numpy.float64,
|
||||
numpy.float32,
|
||||
numpy.int64,
|
||||
numpy.int32,
|
||||
numpy.int16,
|
||||
numpy.int8,
|
||||
numpy.uint8,
|
||||
]
|
||||
for dtype in dtypes:
|
||||
numpy_ary = numpy.arange(6).reshape(2, 3).astype(dtype),
|
||||
numba_ary = numba.cuda.to_device(numpy_ary)
|
||||
self.assertTrue(numba_ary.is_c_contiguous())
|
||||
torch_ary = torch.as_tensor(numba_ary, device="cuda")
|
||||
self.assertTrue(torch_ary.is_contiguous())
|
||||
|
||||
@unittest.skipIf(not TEST_NUMPY, "No numpy")
|
||||
@unittest.skipIf(not TEST_CUDA, "No cuda")
|
||||
@unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
|
||||
|
@ -11,15 +11,13 @@ import torch.backends.mkldnn
|
||||
from common_utils import TestCase, run_tests
|
||||
from hypothesis import given
|
||||
from hypothesis import strategies as st
|
||||
from hypothesis_utils import no_deadline
|
||||
import hypothesis_utils as hu
|
||||
hu.assert_deadline_disabled()
|
||||
from functools import reduce
|
||||
|
||||
|
||||
class IntrinsicQATModuleTest(TestCase):
|
||||
# NOTE: Tests in this class are decorated with no_deadline
|
||||
# to prevent spurious failures due to cuda runtime initialization.
|
||||
|
||||
@no_deadline
|
||||
@given(batch_size=st.integers(2, 4),
|
||||
input_channels_per_group=st.sampled_from([2, 3, 4]),
|
||||
height=st.integers(5, 10),
|
||||
|
@ -42,7 +42,8 @@ from jit_utils import get_forward
|
||||
|
||||
from hypothesis import given
|
||||
from hypothesis import strategies as st
|
||||
from hypothesis_utils import no_deadline
|
||||
import hypothesis_utils as hu
|
||||
hu.assert_deadline_disabled()
|
||||
import io
|
||||
import copy
|
||||
|
||||
@ -50,7 +51,6 @@ import copy
|
||||
" Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs"
|
||||
" with instruction set support avx2 or newer.")
|
||||
class EagerModePostTrainingQuantTest(QuantizationTestCase):
|
||||
@no_deadline
|
||||
@given(qconfig=st.sampled_from((torch.quantization.default_qconfig, torch.quantization.default_per_channel_qconfig)))
|
||||
def test_single_layer(self, qconfig):
|
||||
r"""Quantize SingleLayerLinearModel which has one Linear module, make sure it is swapped
|
||||
@ -919,7 +919,6 @@ class GraphModePostTrainingQuantTest(QuantizationTestCase):
|
||||
|
||||
class FunctionalModuleTest(QuantizationTestCase):
|
||||
# Histogram Observers are slow, so have no-deadline to ensure test doesn't time out
|
||||
@no_deadline
|
||||
@given(train_mode=st.booleans())
|
||||
def test_functional_module(self, train_mode):
|
||||
model = ModelWithFunctionals()
|
||||
@ -1349,7 +1348,6 @@ class RecordHistogramObserverTest(QuantizationTestCase):
|
||||
self.assertEqual(len(observer_dict['fc1.module.activation_post_process'].get_tensor_value()), 2 * len(self.calib_data))
|
||||
self.assertEqual(observer_dict['fc1.module.activation_post_process'].get_tensor_value()[0], model(self.calib_data[0][0]))
|
||||
|
||||
@no_deadline
|
||||
@given(qdtype=st.sampled_from((torch.qint8, torch.quint8)),
|
||||
qscheme=st.sampled_from((torch.per_tensor_affine, torch.per_tensor_symmetric)))
|
||||
def test_observer_scriptable(self, qdtype, qscheme):
|
||||
@ -1366,7 +1364,6 @@ class RecordHistogramObserverTest(QuantizationTestCase):
|
||||
loaded = torch.jit.load(buf)
|
||||
self.assertTrue(torch.equal(obs.get_tensor_value()[0], loaded.get_tensor_value()[0]))
|
||||
|
||||
@no_deadline
|
||||
@given(qdtype=st.sampled_from((torch.qint8, torch.quint8)),
|
||||
qscheme=st.sampled_from((torch.per_tensor_affine, torch.per_tensor_symmetric)),
|
||||
reduce_range=st.booleans())
|
||||
|
@ -10,7 +10,7 @@ from hypothesis import settings, HealthCheck
|
||||
from hypothesis import assume, given
|
||||
from hypothesis import strategies as st
|
||||
import hypothesis_utils as hu
|
||||
from hypothesis_utils import no_deadline
|
||||
hu.assert_deadline_disabled()
|
||||
|
||||
from common_utils import TEST_WITH_UBSAN, TestCase, run_tests, IS_PPC, IS_MACOS
|
||||
from common_quantized import _quantize, _dequantize, _calculate_dynamic_qparams, \
|
||||
@ -145,7 +145,6 @@ class TestQuantizedOps(TestCase):
|
||||
message="{} relu failed".format(name))
|
||||
|
||||
"""Tests the correctness of the scalar addition."""
|
||||
@no_deadline
|
||||
@given(A=hu.tensor(shapes=hu.array_shapes(1, 4, 1, 5),
|
||||
elements=st.floats(-1e6, 1e6, allow_nan=False),
|
||||
qparams=hu.qparams()),
|
||||
@ -506,7 +505,6 @@ class TestQuantizedOps(TestCase):
|
||||
self.assertEqual(a_ref, a_hat.dequantize(),
|
||||
message="ops.quantized.max_pool2d results are off")
|
||||
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
|
||||
min_side=5, max_side=10),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)),
|
||||
@ -556,7 +554,6 @@ class TestQuantizedOps(TestCase):
|
||||
message=error_message.format(name + '.zero_point', scale,
|
||||
qX_hat.q_zero_point()))
|
||||
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
|
||||
min_side=5, max_side=10),
|
||||
qparams=hu.qparams(dtypes=torch.qint8)),
|
||||
@ -619,7 +616,6 @@ class TestQuantizedOps(TestCase):
|
||||
message=error_message.format(name + '.zero_point', scale,
|
||||
X_hat.q_zero_point()))
|
||||
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
|
||||
min_side=1, max_side=10),
|
||||
qparams=hu.qparams(dtypes=torch.quint8)),
|
||||
@ -662,7 +658,6 @@ class TestQuantizedOps(TestCase):
|
||||
qX_hat.q_zero_point()))
|
||||
|
||||
"""Tests adaptive average pool operation on NHWC quantized tensors."""
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
|
||||
min_side=1, max_side=10),
|
||||
qparams=hu.qparams(dtypes=torch.qint8)),
|
||||
@ -708,7 +703,6 @@ class TestQuantizedOps(TestCase):
|
||||
message=error_message.format(name + '.zero_point', scale,
|
||||
X_hat.q_zero_point()))
|
||||
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4,
|
||||
min_side=1, max_side=10),
|
||||
qparams=hu.qparams()),
|
||||
@ -733,7 +727,6 @@ class TestQuantizedOps(TestCase):
|
||||
torch.testing.assert_allclose(quantized_out[0].dequantize(), unquantized_out[0])
|
||||
torch.testing.assert_allclose(quantized_out[1], unquantized_out[1])
|
||||
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
|
||||
min_side=1, max_side=10),
|
||||
qparams=hu.qparams()),
|
||||
@ -818,7 +811,6 @@ class TestQuantizedOps(TestCase):
|
||||
cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale,
|
||||
zero_point=zero_point)
|
||||
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
|
||||
min_side=5, max_side=10),
|
||||
qparams=hu.qparams()),
|
||||
@ -874,7 +866,6 @@ class TestQuantizedOps(TestCase):
|
||||
qX_hat.q_zero_point()))
|
||||
|
||||
"""Tests quantize concatenation (both fused and not)."""
|
||||
@no_deadline
|
||||
@given(X=hu.tensor(shapes=hu.array_shapes(min_dims=4, max_dims=4,
|
||||
min_side=1, max_side=10),
|
||||
qparams=hu.qparams()),
|
||||
@ -999,7 +990,6 @@ class TestQuantizedOps(TestCase):
|
||||
" with instruction set support avx2 or newer.")
|
||||
class TestDynamicQuantizedLinear(TestCase):
|
||||
"""Tests the correctness of the dynamic quantized linear and linear_relu op."""
|
||||
@no_deadline
|
||||
@given(
|
||||
batch_size=st.integers(1, 4),
|
||||
input_channels=st.integers(16, 32),
|
||||
@ -1112,7 +1102,6 @@ class TestDynamicQuantizedLinear(TestCase):
|
||||
message="torch.ops.quantized.linear_dynamic (fbgemm) results are off")
|
||||
|
||||
"""Tests the correctness of the legacy dynamic quantized linear op."""
|
||||
@no_deadline
|
||||
@given(
|
||||
batch_size=st.integers(1, 4),
|
||||
input_channels=st.integers(16, 32),
|
||||
@ -1189,7 +1178,6 @@ class TestDynamicQuantizedLinear(TestCase):
|
||||
|
||||
class TestQuantizedLinear(unittest.TestCase):
|
||||
"""Tests the correctness of the quantized linear and linear_relu op."""
|
||||
@no_deadline
|
||||
@given(batch_size=st.integers(1, 4),
|
||||
input_channels=st.integers(16, 32),
|
||||
output_channels=st.integers(4, 8),
|
||||
|
@ -13,7 +13,8 @@ from common_quantized import _calculate_dynamic_qparams, override_quantized_engi
|
||||
from common_utils import run_tests, IS_PPC, TEST_WITH_UBSAN
|
||||
from hypothesis import assume, given
|
||||
from hypothesis import strategies as st
|
||||
from hypothesis_utils import no_deadline
|
||||
import hypothesis_utils as hu
|
||||
hu.assert_deadline_disabled()
|
||||
|
||||
import io
|
||||
import numpy as np
|
||||
@ -127,7 +128,6 @@ class FunctionalAPITest(QuantizationTestCase):
|
||||
|
||||
|
||||
|
||||
@no_deadline
|
||||
@given(batch_size=st.integers(1, 3),
|
||||
in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
|
||||
H=st.integers(4, 16),
|
||||
@ -181,7 +181,6 @@ class FunctionalAPITest(QuantizationTestCase):
|
||||
W_scale, W_zero_point, Y_scale, Y_zero_point, use_bias,
|
||||
use_channelwise)
|
||||
|
||||
@no_deadline
|
||||
@given(batch_size=st.integers(1, 3),
|
||||
in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
|
||||
D=st.integers(4, 8),
|
||||
@ -239,7 +238,6 @@ class FunctionalAPITest(QuantizationTestCase):
|
||||
|
||||
|
||||
class DynamicModuleAPITest(QuantizationTestCase):
|
||||
@no_deadline
|
||||
@unittest.skipUnless('fbgemm' in torch.backends.quantized.supported_engines,
|
||||
" Quantized operations require FBGEMM. FBGEMM is only optimized for CPUs"
|
||||
" with instruction set support avx2 or newer.")
|
||||
@ -357,7 +355,6 @@ class ModuleAPITest(QuantizationTestCase):
|
||||
message="ReLU6 module API failed")
|
||||
|
||||
|
||||
@no_deadline
|
||||
@given(
|
||||
batch_size=st.integers(1, 5),
|
||||
in_features=st.integers(16, 32),
|
||||
@ -421,7 +418,6 @@ class ModuleAPITest(QuantizationTestCase):
|
||||
self.assertEqual(Z_ref, Z_q)
|
||||
|
||||
# Test serialization of quantized Linear Module using state_dict
|
||||
|
||||
model_dict = qlinear.state_dict()
|
||||
self.assertEqual(model_dict['_packed_params.weight'], W_q)
|
||||
if use_bias:
|
||||
@ -647,7 +643,6 @@ class ModuleAPITest(QuantizationTestCase):
|
||||
# Smoke test extra_repr
|
||||
self.assertTrue(module_name in str(converted_qconv_module))
|
||||
|
||||
@no_deadline
|
||||
@given(batch_size=st.integers(1, 3),
|
||||
in_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]),
|
||||
H=st.integers(4, 16),
|
||||
|
@ -763,6 +763,45 @@ class _TestTorchMixin(object):
|
||||
res = torch.where(a > 0)
|
||||
self.assertEqual(1, len(res))
|
||||
|
||||
def test_where_tensor(self):
|
||||
def rand_tensor(size, dtype, device):
|
||||
if dtype.is_floating_point:
|
||||
return torch.rand(size=size, dtype=dtype, device=device)
|
||||
elif dtype == torch.uint8:
|
||||
return torch.randint(1, 5, size=size, dtype=dtype, device=device)
|
||||
elif dtype == torch.bool:
|
||||
return torch.randint(0, 1, size=size, dtype=dtype, device=device).bool()
|
||||
else:
|
||||
return torch.randint(-5, 5, size=size, dtype=dtype, device=device)
|
||||
|
||||
def get_tensor(size, dtype, device, contiguous):
|
||||
if not contiguous and len(size) < 2:
|
||||
raise RuntimeError("Unable to generate non contiguous tensor with size < 2")
|
||||
t = rand_tensor(size, dtype, device)
|
||||
if contiguous:
|
||||
return t
|
||||
else:
|
||||
return t.transpose(0, 1)
|
||||
|
||||
height = 5
|
||||
width = 5
|
||||
for device in torch.testing.get_all_device_types():
|
||||
for dt1 in torch.testing.get_all_math_dtypes(device):
|
||||
for dt2 in torch.testing.get_all_math_dtypes(device):
|
||||
for contiguous in [True, False]:
|
||||
x1 = get_tensor((height, width), dt1, device, contiguous)
|
||||
x2 = get_tensor((height, width), dt2, device, contiguous)
|
||||
if dt1 != dt2:
|
||||
self.assertRaisesRegex(RuntimeError, "expected scalar type", lambda: torch.where(x1 == 1, x1, x2))
|
||||
else:
|
||||
if x1.is_floating_point():
|
||||
condition = (x1 < 0.5)
|
||||
else:
|
||||
condition = (x1 == 1)
|
||||
expected = condition.to(x1.dtype) * x1 + (~condition).to(x2.dtype) * x2
|
||||
result = torch.where(condition, x1, x2)
|
||||
self.assertEqual(expected, result)
|
||||
|
||||
def test_all_any_with_dim(self):
|
||||
def test(x):
|
||||
r1 = x.prod(dim=0, keepdim=False).byte()
|
||||
@ -1772,6 +1811,13 @@ class _TestTorchMixin(object):
|
||||
x = torch.tensor(2., requires_grad=True)
|
||||
self.assertRaises(Exception, lambda: y.addcmul(y, y, value=x))
|
||||
|
||||
# FIXME: get rid of this once we have actual ops using optional floats
|
||||
def test_optional_floats(self):
|
||||
x = torch.randn(())
|
||||
self.assertEqual(torch._test_optional_float(x), torch.empty((0,)))
|
||||
self.assertEqual(torch._test_optional_float(x, scale=None), torch.empty((0,)))
|
||||
self.assertEqual(torch._test_optional_float(x, scale=2.5), torch.full((), 2.5))
|
||||
|
||||
def test_copy_broadcast(self):
|
||||
torch.zeros(5, 6).copy_(torch.zeros(6))
|
||||
self.assertRaises(RuntimeError, lambda: torch.zeros(5, 6).copy_(torch.zeros(30)))
|
||||
@ -13661,23 +13707,69 @@ class TestTorchDeviceType(TestCase):
|
||||
result = torch.cat(concat_list)
|
||||
self.assertEqual(result.size(0), SIZE1 + SIZE2)
|
||||
|
||||
# NOTE [Linspace+Logspace precision override]
|
||||
# Our Linspace and logspace torch.half CUDA kernels are not very precise.
|
||||
# Since linspace/logspace are deterministic, we can compute an expected
|
||||
# amount of error (by testing without a precision override), adding a tiny
|
||||
# amount (EPS) to that, and using that value as the override.
|
||||
LINSPACE_LOGSPACE_EXTRA_EPS = 1e-5
|
||||
|
||||
# Tests that compare a device's computation with the (gold-standard) CPU's.
|
||||
class TestDevicePrecision(TestCase):
|
||||
def test_linspace(self, device):
|
||||
a = torch.linspace(0, 10, 10, device=device)
|
||||
b = torch.linspace(0, 10, 10)
|
||||
|
||||
# The implementation of linspace+logspace goes through a different path
|
||||
# when the steps arg is equal to 0 or 1. For other values of `steps`
|
||||
# they call specialized linspace (or logspace) kernels.
|
||||
LINSPACE_LOGSPACE_SPECIAL_STEPS = [0, 1]
|
||||
|
||||
def _test_linspace(self, device, dtype, steps):
|
||||
a = torch.linspace(0, 10, steps=steps, dtype=dtype, device=device)
|
||||
b = torch.linspace(0, 10, steps=steps)
|
||||
self.assertEqual(a, b)
|
||||
|
||||
@dtypes(torch.double)
|
||||
# See NOTE [Linspace+Logspace precision override]
|
||||
@precisionOverride({torch.half: 0.0039 + LINSPACE_LOGSPACE_EXTRA_EPS})
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float, torch.double)
|
||||
def test_linspace(self, device, dtype):
|
||||
self._test_linspace(device, dtype, steps=10)
|
||||
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float, torch.double)
|
||||
def test_linspace_special_steps(self, device, dtype):
|
||||
for steps in self.LINSPACE_LOGSPACE_SPECIAL_STEPS:
|
||||
self._test_linspace(device, dtype, steps=steps)
|
||||
|
||||
def _test_logspace(self, device, dtype, steps):
|
||||
a = torch.logspace(1, 1.1, steps=steps, dtype=dtype, device=device)
|
||||
b = torch.logspace(1, 1.1, steps=steps)
|
||||
self.assertEqual(a, b)
|
||||
|
||||
def _test_logspace_base2(self, device, dtype, steps):
|
||||
a = torch.logspace(1, 1.1, steps=steps, base=2, dtype=dtype, device=device)
|
||||
b = torch.logspace(1, 1.1, steps=steps, base=2)
|
||||
self.assertEqual(a, b)
|
||||
|
||||
# See NOTE [Linspace+Logspace precision override]
|
||||
@precisionOverride({torch.half: 0.0157 + LINSPACE_LOGSPACE_EXTRA_EPS})
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float, torch.double)
|
||||
def test_logspace(self, device, dtype):
|
||||
a = torch.logspace(1, 10, 10, dtype=dtype, device=device)
|
||||
b = torch.logspace(1, 10, 10, dtype=dtype, device='cpu')
|
||||
self.assertEqual(a, b)
|
||||
self._test_logspace(device, dtype, steps=10)
|
||||
|
||||
# Check non-default base=2
|
||||
a = torch.logspace(1, 10, 10, 2, dtype=dtype, device=device)
|
||||
b = torch.logspace(1, 10, 10, 2, dtype=dtype, device='cpu')
|
||||
self.assertEqual(a, b)
|
||||
# See NOTE [Linspace+Logspace precision override]
|
||||
@precisionOverride({torch.half: 0.00201 + LINSPACE_LOGSPACE_EXTRA_EPS})
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float, torch.double)
|
||||
def test_logspace_base2(self, device, dtype):
|
||||
self._test_logspace_base2(device, dtype, steps=10)
|
||||
|
||||
@dtypesIfCUDA(torch.half, torch.float, torch.double)
|
||||
@dtypes(torch.float, torch.double)
|
||||
def test_logspace_special_steps(self, device, dtype):
|
||||
for steps in self.LINSPACE_LOGSPACE_SPECIAL_STEPS:
|
||||
self._test_logspace(device, dtype, steps=steps)
|
||||
self._test_logspace_base2(device, dtype, steps=steps)
|
||||
|
||||
# Note: ROCm fails when using float tensors
|
||||
@dtypes(torch.double)
|
||||
|
2
third_party/fbgemm
vendored
2
third_party/fbgemm
vendored
Submodule third_party/fbgemm updated: 6aaaa4754f...399ea148f1
@ -328,6 +328,7 @@ def create_python_bindings(python_functions, has_self, is_module=False):
|
||||
'c10::optional<Scalar>': 'scalarOptional',
|
||||
'c10::optional<int64_t>': 'toInt64Optional',
|
||||
'c10::optional<bool>': 'toBoolOptional',
|
||||
'c10::optional<double>': 'toDoubleOptional',
|
||||
'IntArrayRef': 'intlist',
|
||||
'int64_t': 'toInt64',
|
||||
'bool': 'toBool',
|
||||
|
@ -63,6 +63,7 @@ TYPE_MAP = {
|
||||
'int64_t': 'int',
|
||||
'int64_t?': 'int?',
|
||||
'double': 'float',
|
||||
'double?': 'float?',
|
||||
'bool': 'bool',
|
||||
'bool?': 'bool?',
|
||||
'Generator': 'Generator?',
|
||||
@ -115,6 +116,7 @@ FROM_IVALUE = {
|
||||
'bool': '{}.toBool()',
|
||||
'bool?': '{}.toOptional<bool>()',
|
||||
'double': '{}.toDouble()',
|
||||
'double?': '{}.toOptional<double>()',
|
||||
'int64_t': '{}.toInt()',
|
||||
'int64_t?': '{}.toOptional<int64_t>()',
|
||||
'std::string': '{}.toStringRef()',
|
||||
|
@ -3751,25 +3751,37 @@ add_docstr(torch.nonzero,
|
||||
r"""
|
||||
nonzero(input, *, out=None, as_tuple=False) -> LongTensor or tuple of LongTensors
|
||||
|
||||
**When** :attr:`as_tuple` **is false or unspecified:**
|
||||
.. note::
|
||||
:func:`torch.nonzero(..., as_tuple=False) <torch.nonzero>` (default) returns a
|
||||
2-D tensor where each row is the index for a nonzero value.
|
||||
|
||||
:func:`torch.nonzero(..., as_tuple=True) <torch.nonzero>` returns a tuple of 1-D
|
||||
index tensors, allowing for advanced indexing, so ``x[x.nonzero(as_tuple=True)]``
|
||||
gives all nonzero values of tensor ``x``. Of the returned tuple, each index tensor
|
||||
contains nonzero indices for a certain dimension.
|
||||
|
||||
See below for more details on the two behaviors.
|
||||
|
||||
|
||||
**When** :attr:`as_tuple` **is ``False`` (default)**:
|
||||
|
||||
Returns a tensor containing the indices of all non-zero elements of
|
||||
:attr:`input`. Each row in the result contains the indices of a non-zero
|
||||
element in :attr:`input`. The result is sorted lexicographically, with
|
||||
the last index changing the fastest (C-style).
|
||||
|
||||
If :attr:`input` has `n` dimensions, then the resulting indices tensor
|
||||
If :attr:`input` has :math:`n` dimensions, then the resulting indices tensor
|
||||
:attr:`out` is of size :math:`(z \times n)`, where :math:`z` is the total number of
|
||||
non-zero elements in the :attr:`input` tensor.
|
||||
|
||||
**When** :attr:`as_tuple` **is true:**
|
||||
**When** :attr:`as_tuple` **is ``True``**:
|
||||
|
||||
Returns a tuple of 1-D tensors, one for each dimension in :attr:`input`,
|
||||
each containing the indices (in that dimension) of all non-zero elements of
|
||||
:attr:`input` .
|
||||
|
||||
If :attr:`input` has `n` dimensions, then the resulting tuple contains `n` tensors
|
||||
of size `z`, where `z` is the total number of
|
||||
If :attr:`input` has :math:`n` dimensions, then the resulting tuple contains :math:`n`
|
||||
tensors of size :math:`z`, where :math:`z` is the total number of
|
||||
non-zero elements in the :attr:`input` tensor.
|
||||
|
||||
As a special case, when :attr:`input` has zero dimensions and a nonzero scalar
|
||||
@ -3780,8 +3792,8 @@ Args:
|
||||
out (LongTensor, optional): the output tensor containing indices
|
||||
|
||||
Returns:
|
||||
LongTensor or tuple of LongTensor: If :attr:`as_tuple` is false, the output
|
||||
tensor containing indices. If :attr:`as_tuple` is true, one 1-D tensor for
|
||||
LongTensor or tuple of LongTensor: If :attr:`as_tuple` is ``False``, the output
|
||||
tensor containing indices. If :attr:`as_tuple` is ``True``, one 1-D tensor for
|
||||
each dimension, containing the indices of each nonzero element along that
|
||||
dimension.
|
||||
|
||||
@ -5199,6 +5211,9 @@ i.e., if the last two dimensions of :attr:`input` are ``m`` and ``n``, then the
|
||||
If :attr:`compute_uv` is ``False``, the returned `U` and `V` matrices will be zero matrices
|
||||
of shape :math:`(m \times m)` and :math:`(n \times n)` respectively. :attr:`some` will be ignored here.
|
||||
|
||||
.. note:: The singular values are returned in descending order. If :attr:`input` is a batch of matrices,
|
||||
then the singular values of each matrix in the batch is returned in descending order.
|
||||
|
||||
.. note:: The implementation of SVD on CPU uses the LAPACK routine `?gesdd` (a divide-and-conquer
|
||||
algorithm) instead of `?gesvd` for speed. Analogously, the SVD on GPU uses the MAGMA routine
|
||||
`gesdd` as well.
|
||||
@ -5279,6 +5294,9 @@ only the upper triangular portion is used by default.
|
||||
|
||||
If :attr:`upper` is ``False``, then lower triangular portion is used.
|
||||
|
||||
.. note:: The eigenvalues are returned in ascending order. If :attr:`input` is a batch of matrices,
|
||||
then the eigenvalues of each matrix in the batch is returned in ascending order.
|
||||
|
||||
.. note:: Irrespective of the original strides, the returned matrix `V` will
|
||||
be transposed, i.e. with strides `V.contiguous().transpose(-1, -2).stride()`.
|
||||
|
||||
@ -5782,7 +5800,7 @@ The upper triangular part of the matrix is defined as the elements on and
|
||||
above the diagonal.
|
||||
|
||||
The argument :attr:`diagonal` controls which diagonal to consider. If
|
||||
:attr:`diagonal` = 0, all elements on and below the main diagonal are
|
||||
:attr:`diagonal` = 0, all elements on and above the main diagonal are
|
||||
retained. A positive value excludes just as many diagonals above the main
|
||||
diagonal, and similarly a negative value includes just as many diagonals below
|
||||
the main diagonal. The main diagonal are the set of indices
|
||||
|
@ -22,6 +22,8 @@ namespace datasets {
|
||||
template <typename ExampleType_, typename ChunkType_ = std::vector<ExampleType_>>
|
||||
class ChunkDataReader {
|
||||
public:
|
||||
virtual ~ChunkDataReader() = default;
|
||||
|
||||
using ChunkType = ChunkType_;
|
||||
using ExampleType = ExampleType_;
|
||||
|
||||
|
@ -47,7 +47,7 @@ class Cloneable : public virtual Module {
|
||||
"parameters as the original module after calling reset(). "
|
||||
"Are you sure you called register_parameter() inside reset() "
|
||||
"and not the constructor?");
|
||||
for (const auto& parameter : parameters_) {
|
||||
for (const auto& parameter : named_parameters(/*recurse=*/false)) {
|
||||
auto& tensor = *parameter;
|
||||
auto data = device && tensor.device() != *device ?
|
||||
tensor.to(*device) : autograd::Variable(tensor).clone();
|
||||
@ -59,7 +59,7 @@ class Cloneable : public virtual Module {
|
||||
"buffers as the original module after calling reset(). "
|
||||
"Are you sure you called register_buffer() inside reset() "
|
||||
"and not the constructor?");
|
||||
for (const auto& buffer : buffers_) {
|
||||
for (const auto& buffer : named_buffers(/*recurse=*/false)) {
|
||||
auto& tensor = *buffer;
|
||||
auto data = device && tensor.device() != *device ?
|
||||
tensor.to(*device) : autograd::Variable(tensor).clone();
|
||||
|
@ -648,11 +648,11 @@ void Module::to_impl(Ts&&... ts) {
|
||||
child.value()->to(ts...);
|
||||
}
|
||||
// Then move every parameter to the new dtype/device.
|
||||
for (auto& parameter : parameters_) {
|
||||
for (auto& parameter : named_parameters(/*recurse=*/false)) {
|
||||
parameter->set_data(autograd::Variable(*parameter).to(ts...));
|
||||
}
|
||||
// Then move every buffer to the new dtype/device.
|
||||
for (auto& buffer : buffers_) {
|
||||
for (auto& buffer : named_buffers(/*recurse=*/false)) {
|
||||
buffer->set_data(autograd::Variable(*buffer).to(ts...));
|
||||
}
|
||||
}
|
||||
|
@ -9,8 +9,6 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace F = torch::nn::functional;
|
||||
|
||||
namespace torch {
|
||||
namespace nn {
|
||||
|
||||
@ -178,7 +176,7 @@ class BatchNormImplBase : public NormImplBase<D, Derived, BatchNormOptions> {
|
||||
}
|
||||
}
|
||||
|
||||
return F::detail::batch_norm(
|
||||
return torch::nn::functional::detail::batch_norm(
|
||||
input,
|
||||
this->running_mean,
|
||||
this->running_var,
|
||||
|
@ -17,9 +17,9 @@ namespace nn {
|
||||
|
||||
/// Base class for all (dimension-specialized) convolution modules.
|
||||
template <size_t D, typename Derived>
|
||||
class ConvImpl : public torch::nn::Cloneable<Derived> {
|
||||
class ConvNdImpl : public torch::nn::Cloneable<Derived> {
|
||||
public:
|
||||
explicit ConvImpl(ConvOptions<D> options_) : options(std::move(options_)) {
|
||||
explicit ConvNdImpl(detail::ConvNdOptions<D> options_) : options(std::move(options_)) {
|
||||
reset();
|
||||
}
|
||||
|
||||
@ -98,7 +98,7 @@ class ConvImpl : public torch::nn::Cloneable<Derived> {
|
||||
}
|
||||
|
||||
/// The options with which this `Module` was constructed.
|
||||
ConvOptions<D> options;
|
||||
detail::ConvNdOptions<D> options;
|
||||
|
||||
/// The learned kernel (or "weight").
|
||||
Tensor weight;
|
||||
@ -112,15 +112,15 @@ class ConvImpl : public torch::nn::Cloneable<Derived> {
|
||||
/// Applies convolution over a 1-D input.
|
||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv1d to learn about
|
||||
/// the exact behavior of this module.
|
||||
class TORCH_API Conv1dImpl : public ConvImpl<1, Conv1dImpl> {
|
||||
class TORCH_API Conv1dImpl : public ConvNdImpl<1, Conv1dImpl> {
|
||||
public:
|
||||
Conv1dImpl(
|
||||
int64_t input_channels,
|
||||
int64_t output_channels,
|
||||
ExpandingArray<1> kernel_size)
|
||||
: Conv1dImpl(ConvOptions<1>(input_channels, output_channels, kernel_size)) {
|
||||
: Conv1dImpl(Conv1dOptions(input_channels, output_channels, kernel_size)) {
|
||||
}
|
||||
explicit Conv1dImpl(ConvOptions<1> options_);
|
||||
explicit Conv1dImpl(Conv1dOptions options_);
|
||||
Tensor forward(const Tensor& input);
|
||||
};
|
||||
|
||||
@ -135,15 +135,15 @@ TORCH_MODULE(Conv1d);
|
||||
/// Applies convolution over a 2-D input.
|
||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv2d to learn about
|
||||
/// the exact behavior of this module.
|
||||
class TORCH_API Conv2dImpl : public ConvImpl<2, Conv2dImpl> {
|
||||
class TORCH_API Conv2dImpl : public ConvNdImpl<2, Conv2dImpl> {
|
||||
public:
|
||||
Conv2dImpl(
|
||||
int64_t input_channels,
|
||||
int64_t output_channels,
|
||||
ExpandingArray<2> kernel_size)
|
||||
: Conv2dImpl(ConvOptions<2>(input_channels, output_channels, kernel_size)) {
|
||||
: Conv2dImpl(Conv2dOptions(input_channels, output_channels, kernel_size)) {
|
||||
}
|
||||
explicit Conv2dImpl(ConvOptions<2> options_);
|
||||
explicit Conv2dImpl(Conv2dOptions options_);
|
||||
Tensor forward(const Tensor& input);
|
||||
};
|
||||
|
||||
@ -158,15 +158,15 @@ TORCH_MODULE(Conv2d);
|
||||
/// Applies convolution over a 3-D input.
|
||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.Conv3d to learn about
|
||||
/// the exact behavior of this module.
|
||||
class TORCH_API Conv3dImpl : public ConvImpl<3, Conv3dImpl> {
|
||||
class TORCH_API Conv3dImpl : public ConvNdImpl<3, Conv3dImpl> {
|
||||
public:
|
||||
Conv3dImpl(
|
||||
int64_t input_channels,
|
||||
int64_t output_channels,
|
||||
ExpandingArray<3> kernel_size)
|
||||
: Conv3dImpl(ConvOptions<3>(input_channels, output_channels, kernel_size)) {
|
||||
: Conv3dImpl(Conv3dOptions(input_channels, output_channels, kernel_size)) {
|
||||
}
|
||||
explicit Conv3dImpl(ConvOptions<3> options_);
|
||||
explicit Conv3dImpl(Conv3dOptions options_);
|
||||
Tensor forward(const Tensor& input);
|
||||
};
|
||||
|
||||
@ -180,9 +180,9 @@ TORCH_MODULE(Conv3d);
|
||||
|
||||
/// Base class for all (dimension-specialized) convolution transpose modules.
|
||||
template <size_t D, typename Derived>
|
||||
class ConvTransposeImpl : public ConvImpl<D, Derived> {
|
||||
class ConvTransposeNdImpl : public ConvNdImpl<D, Derived> {
|
||||
public:
|
||||
using torch::nn::ConvImpl<D, Derived>::ConvImpl;
|
||||
using torch::nn::ConvNdImpl<D, Derived>::ConvNdImpl;
|
||||
|
||||
/// Pretty prints the `ConvTranspose{1,2,3}d` module into the given `stream`.
|
||||
void pretty_print(std::ostream& stream) const override {
|
||||
@ -224,15 +224,15 @@ class ConvTransposeImpl : public ConvImpl<D, Derived> {
|
||||
/// Applies the ConvTranspose1d function.
|
||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose1d to
|
||||
/// learn about the exact behavior of this module.
|
||||
class TORCH_API ConvTranspose1dImpl : public ConvTransposeImpl<1, ConvTranspose1dImpl> {
|
||||
class TORCH_API ConvTranspose1dImpl : public ConvTransposeNdImpl<1, ConvTranspose1dImpl> {
|
||||
public:
|
||||
ConvTranspose1dImpl(
|
||||
int64_t input_channels,
|
||||
int64_t output_channels,
|
||||
ExpandingArray<1> kernel_size)
|
||||
: ConvTranspose1dImpl(ConvTransposeOptions<1>(input_channels, output_channels, kernel_size)) {
|
||||
: ConvTranspose1dImpl(ConvTranspose1dOptions(input_channels, output_channels, kernel_size)) {
|
||||
}
|
||||
explicit ConvTranspose1dImpl(ConvTransposeOptions<1> options_);
|
||||
explicit ConvTranspose1dImpl(ConvTranspose1dOptions options_);
|
||||
Tensor forward(const Tensor& input,
|
||||
const c10::optional<at::IntArrayRef>& output_size = c10::nullopt);
|
||||
};
|
||||
@ -244,15 +244,15 @@ TORCH_MODULE(ConvTranspose1d);
|
||||
/// Applies the ConvTranspose2d function.
|
||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose2d to
|
||||
/// learn about the exact behavior of this module.
|
||||
class TORCH_API ConvTranspose2dImpl : public ConvTransposeImpl<2, ConvTranspose2dImpl> {
|
||||
class TORCH_API ConvTranspose2dImpl : public ConvTransposeNdImpl<2, ConvTranspose2dImpl> {
|
||||
public:
|
||||
ConvTranspose2dImpl(
|
||||
int64_t input_channels,
|
||||
int64_t output_channels,
|
||||
ExpandingArray<2> kernel_size)
|
||||
: ConvTranspose2dImpl(ConvTransposeOptions<2>(input_channels, output_channels, kernel_size)) {
|
||||
: ConvTranspose2dImpl(ConvTranspose2dOptions(input_channels, output_channels, kernel_size)) {
|
||||
}
|
||||
explicit ConvTranspose2dImpl(ConvTransposeOptions<2> options_);
|
||||
explicit ConvTranspose2dImpl(ConvTranspose2dOptions options_);
|
||||
Tensor forward(const Tensor& input,
|
||||
const c10::optional<at::IntArrayRef>& output_size = c10::nullopt);
|
||||
};
|
||||
@ -264,15 +264,15 @@ TORCH_MODULE(ConvTranspose2d);
|
||||
/// Applies the ConvTranspose3d function.
|
||||
/// See https://pytorch.org/docs/master/nn.html#torch.nn.ConvTranspose3d to
|
||||
/// learn about the exact behavior of this module.
|
||||
class TORCH_API ConvTranspose3dImpl : public ConvTransposeImpl<3, ConvTranspose3dImpl> {
|
||||
class TORCH_API ConvTranspose3dImpl : public ConvTransposeNdImpl<3, ConvTranspose3dImpl> {
|
||||
public:
|
||||
ConvTranspose3dImpl(
|
||||
int64_t input_channels,
|
||||
int64_t output_channels,
|
||||
ExpandingArray<3> kernel_size)
|
||||
: ConvTranspose3dImpl(ConvTransposeOptions<3>(input_channels, output_channels, kernel_size)) {
|
||||
: ConvTranspose3dImpl(ConvTranspose3dOptions(input_channels, output_channels, kernel_size)) {
|
||||
}
|
||||
explicit ConvTranspose3dImpl(ConvTransposeOptions<3> options_);
|
||||
explicit ConvTranspose3dImpl(ConvTranspose3dOptions options_);
|
||||
Tensor forward(const Tensor& input,
|
||||
const c10::optional<at::IntArrayRef>& output_size = c10::nullopt);
|
||||
};
|
||||
|
@ -14,7 +14,7 @@ class InstanceNormImpl : public torch::nn::NormImplBase<D, Derived, InstanceNorm
|
||||
|
||||
Tensor forward(const Tensor& input) {
|
||||
this->_check_input_dim(input);
|
||||
return F::detail::instance_norm(
|
||||
return torch::nn::functional::detail::instance_norm(
|
||||
input, this->running_mean, this->running_var, this->weight, this->bias,
|
||||
this->is_training() || !this->options.track_running_stats(), this->options.momentum(), this->options.eps());
|
||||
}
|
||||
|
@ -9,12 +9,14 @@
|
||||
namespace torch {
|
||||
namespace nn {
|
||||
|
||||
/// Options for a `D`-dimensional convolution module.
|
||||
template <size_t D>
|
||||
struct ConvOptions {
|
||||
typedef c10::variant<enumtype::kZeros, enumtype::kCircular> padding_mode_t;
|
||||
namespace detail {
|
||||
|
||||
ConvOptions(
|
||||
typedef c10::variant<enumtype::kZeros, enumtype::kCircular> conv_padding_mode_t;
|
||||
|
||||
/// Options for a `D`-dimensional convolution or convolution transpose module.
|
||||
template <size_t D>
|
||||
struct ConvNdOptions {
|
||||
ConvNdOptions(
|
||||
int64_t in_channels,
|
||||
int64_t out_channels,
|
||||
ExpandingArray<D> kernel_size) :
|
||||
@ -73,6 +75,67 @@ struct ConvOptions {
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(bool, bias) = true;
|
||||
|
||||
/// Accepted values `zeros` and `circular` Default: `zeros`
|
||||
TORCH_ARG(conv_padding_mode_t, padding_mode) = torch::kZeros;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// ============================================================================
|
||||
|
||||
/// Options for a `D`-dimensional convolution module.
|
||||
template <size_t D>
|
||||
struct ConvOptions {
|
||||
using padding_mode_t = detail::conv_padding_mode_t;
|
||||
|
||||
ConvOptions(
|
||||
int64_t in_channels,
|
||||
int64_t out_channels,
|
||||
ExpandingArray<D> kernel_size) :
|
||||
in_channels_(in_channels),
|
||||
out_channels_(out_channels),
|
||||
kernel_size_(std::move(kernel_size)) {}
|
||||
|
||||
/// The number of channels the input volumes will have.
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(int64_t, in_channels);
|
||||
|
||||
/// The number of output channels the convolution should produce.
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(int64_t, out_channels);
|
||||
|
||||
/// The kernel size to use.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, kernel_size);
|
||||
|
||||
/// The stride of the convolution.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, stride) = 1;
|
||||
|
||||
/// The padding to add to the input volumes.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, padding) = 0;
|
||||
|
||||
/// The kernel dilation.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, dilation) = 1;
|
||||
|
||||
/// The number of convolution groups.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(int64_t, groups) = 1;
|
||||
|
||||
/// Whether to add a bias after individual applications of the kernel.
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(bool, bias) = true;
|
||||
|
||||
/// Accepted values `zeros` and `circular` Default: `zeros`
|
||||
TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros;
|
||||
};
|
||||
@ -129,8 +192,67 @@ using Conv3dFuncOptions = ConvFuncOptions<3>;
|
||||
|
||||
// ============================================================================
|
||||
|
||||
template<size_t D>
|
||||
using ConvTransposeOptions = ConvOptions<D>;
|
||||
template <size_t D>
|
||||
struct ConvTransposeOptions {
|
||||
using padding_mode_t = detail::conv_padding_mode_t;
|
||||
|
||||
ConvTransposeOptions(
|
||||
int64_t in_channels,
|
||||
int64_t out_channels,
|
||||
ExpandingArray<D> kernel_size) :
|
||||
in_channels_(in_channels),
|
||||
out_channels_(out_channels),
|
||||
kernel_size_(std::move(kernel_size)) {}
|
||||
|
||||
/// The number of channels the input volumes will have.
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(int64_t, in_channels);
|
||||
|
||||
/// The number of output channels the convolution should produce.
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(int64_t, out_channels);
|
||||
|
||||
/// The kernel size to use.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, kernel_size);
|
||||
|
||||
/// The stride of the convolution.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, stride) = 1;
|
||||
|
||||
/// The padding to add to the input volumes.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, padding) = 0;
|
||||
|
||||
/// For transpose convolutions, the padding to add to output volumes.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, output_padding) = 0;
|
||||
|
||||
/// The number of convolution groups.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(int64_t, groups) = 1;
|
||||
|
||||
/// Whether to add a bias after individual applications of the kernel.
|
||||
/// Changing this parameter after construction __has no effect__.
|
||||
TORCH_ARG(bool, bias) = true;
|
||||
|
||||
/// The kernel dilation.
|
||||
/// For a `D`-dim convolution, must be a single number or a list of `D`
|
||||
/// numbers.
|
||||
/// This parameter __can__ be changed after construction.
|
||||
TORCH_ARG(ExpandingArray<D>, dilation) = 1;
|
||||
|
||||
/// Accepted values `zeros` and `circular` Default: `zeros`
|
||||
TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros;
|
||||
};
|
||||
|
||||
/// `ConvTransposeOptions` specialized for 1-D convolution.
|
||||
using ConvTranspose1dOptions = ConvTransposeOptions<1>;
|
||||
|
@ -100,7 +100,7 @@ void replicate_grad_edges(
|
||||
const std::vector<std::shared_ptr<ModuleType>>& replicas,
|
||||
const std::vector<Device>& devices) {
|
||||
|
||||
for (auto& parameter : module->parameters_) {
|
||||
for (auto& parameter : module->named_parameters(/*recurse=*/false)) {
|
||||
auto grad_fn = std::make_shared<ReduceAdd>((*parameter).device());
|
||||
grad_fn->set_next_edges(autograd::collect_next_edges(*parameter));
|
||||
|
||||
@ -109,7 +109,7 @@ void replicate_grad_edges(
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& buffer : module->buffers_) {
|
||||
for (auto& buffer : module->named_buffers(/*recurse=*/false)) {
|
||||
if (buffer.value().requires_grad()){
|
||||
auto grad_fn = std::make_shared<ReduceAdd>((*buffer).device());
|
||||
grad_fn->set_next_edges(autograd::collect_next_edges(*buffer));
|
||||
|
@ -32,15 +32,6 @@ std::string join_name(const std::string& name_prefix, const std::string& name) {
|
||||
full_name += name;
|
||||
return full_name;
|
||||
}
|
||||
|
||||
void extend(
|
||||
std::vector<Tensor>& vector,
|
||||
const OrderedDict<std::string, Tensor>& dict) {
|
||||
vector.reserve(vector.size() + dict.size());
|
||||
for (const auto& item : dict) {
|
||||
vector.push_back(item.value());
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Module::Module()
|
||||
@ -141,46 +132,48 @@ void Module::apply(
|
||||
}
|
||||
|
||||
std::vector<Tensor> Module::parameters(bool recurse) const {
|
||||
if (!recurse) {
|
||||
return parameters_.values();
|
||||
}
|
||||
std::vector<Tensor> result;
|
||||
apply(
|
||||
[&result](const Module& module) { extend(result, module.parameters_); });
|
||||
return result;
|
||||
return named_parameters(recurse).values();
|
||||
}
|
||||
|
||||
OrderedDict<std::string, Tensor> Module::named_parameters(bool recurse) const {
|
||||
if (!recurse) {
|
||||
return parameters_;
|
||||
}
|
||||
OrderedDict<std::string, Tensor> result;
|
||||
apply([&result](const std::string& name, const Module& module) {
|
||||
for (const auto& parameter : module.parameters_) {
|
||||
result.insert(join_name(name, parameter.key()), parameter.value());
|
||||
if (!recurse) {
|
||||
for (const auto& parameter : parameters_) {
|
||||
if (parameter.value().defined()) {
|
||||
result.insert(parameter.key(), parameter.value());
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
apply([&result](const std::string& name, const Module& module) {
|
||||
for (const auto& parameter : module.named_parameters(/*recurse=*/false)) {
|
||||
TORCH_INTERNAL_ASSERT(parameter.value().defined());
|
||||
result.insert(join_name(name, parameter.key()), parameter.value());
|
||||
}
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<Tensor> Module::buffers(bool recurse) const {
|
||||
if (!recurse) {
|
||||
return buffers_.values();
|
||||
}
|
||||
std::vector<Tensor> result;
|
||||
apply([&result](const Module& module) { extend(result, module.buffers_); });
|
||||
return result;
|
||||
return named_buffers(recurse).values();
|
||||
}
|
||||
|
||||
OrderedDict<std::string, Tensor> Module::named_buffers(bool recurse) const {
|
||||
if (!recurse) {
|
||||
return buffers_;
|
||||
}
|
||||
OrderedDict<std::string, Tensor> result;
|
||||
apply([&result](const std::string& name, const Module& module) {
|
||||
for (const auto& buffer : module.buffers_) {
|
||||
result.insert(join_name(name, buffer.key()), buffer.value());
|
||||
if (!recurse) {
|
||||
for (const auto& buffer : buffers_) {
|
||||
if (buffer.value().defined()) {
|
||||
result.insert(buffer.key(), buffer.value());
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
apply([&result](const std::string& name, const Module& module) {
|
||||
for (const auto& buffer : module.named_buffers(/*recurse=*/false)) {
|
||||
TORCH_INTERNAL_ASSERT(buffer.value().defined());
|
||||
result.insert(join_name(name, buffer.key()), buffer.value());
|
||||
}
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -261,7 +254,7 @@ void Module::zero_grad() {
|
||||
for (auto& child : children_) {
|
||||
child.value()->zero_grad();
|
||||
}
|
||||
for (auto& parameter : parameters_) {
|
||||
for (auto& parameter : named_parameters(/*recurse=*/false)) {
|
||||
auto& grad = parameter->grad();
|
||||
if (grad.defined()) {
|
||||
grad = grad.detach();
|
||||
@ -271,10 +264,10 @@ void Module::zero_grad() {
|
||||
}
|
||||
|
||||
void Module::save(serialize::OutputArchive& archive) const {
|
||||
for (const auto& parameter : parameters_) {
|
||||
for (const auto& parameter : named_parameters(/*recurse=*/false)) {
|
||||
archive.write(parameter.key(), parameter.value());
|
||||
}
|
||||
for (const auto& buffer : buffers_) {
|
||||
for (const auto& buffer : named_buffers(/*recurse=*/false)) {
|
||||
archive.write(buffer.key(), buffer.value(), /*is_buffer=*/true);
|
||||
}
|
||||
for (const auto& child : children_) {
|
||||
@ -287,10 +280,10 @@ void Module::save(serialize::OutputArchive& archive) const {
|
||||
}
|
||||
|
||||
void Module::load(serialize::InputArchive& archive) {
|
||||
for (auto& parameter : parameters_) {
|
||||
for (auto& parameter : named_parameters(/*recurse=*/false)) {
|
||||
archive.read(parameter.key(), parameter.value());
|
||||
}
|
||||
for (auto& buffer : buffers_) {
|
||||
for (auto& buffer : named_buffers(/*recurse=*/false)) {
|
||||
archive.read(buffer.key(), buffer.value(), /*is_buffer=*/true);
|
||||
}
|
||||
for (const auto& child : children_) {
|
||||
|
@ -19,8 +19,20 @@ namespace F = torch::nn::functional;
|
||||
namespace torch {
|
||||
namespace nn {
|
||||
Conv1dImpl::Conv1dImpl(
|
||||
ConvOptions<1> options_)
|
||||
: ConvImpl(options_.transposed(false).output_padding(0)) {}
|
||||
Conv1dOptions options_)
|
||||
: ConvNdImpl(
|
||||
detail::ConvNdOptions<1>(
|
||||
/*in_channels=*/options_.in_channels(),
|
||||
/*out_channels=*/options_.out_channels(),
|
||||
/*kernel_size=*/options_.kernel_size())
|
||||
.stride(options_.stride())
|
||||
.padding(options_.padding())
|
||||
.dilation(options_.dilation())
|
||||
.transposed(false)
|
||||
.output_padding(0)
|
||||
.groups(options_.groups())
|
||||
.bias(options_.bias())
|
||||
.padding_mode(options_.padding_mode())) {}
|
||||
|
||||
Tensor Conv1dImpl::forward(const Tensor& input) {
|
||||
if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) {
|
||||
@ -44,8 +56,20 @@ Tensor Conv1dImpl::forward(const Tensor& input) {
|
||||
}
|
||||
|
||||
Conv2dImpl::Conv2dImpl(
|
||||
ConvOptions<2> options_)
|
||||
: ConvImpl(options_.transposed(false).output_padding(0)) {}
|
||||
Conv2dOptions options_)
|
||||
: ConvNdImpl(
|
||||
detail::ConvNdOptions<2>(
|
||||
/*in_channels=*/options_.in_channels(),
|
||||
/*out_channels=*/options_.out_channels(),
|
||||
/*kernel_size=*/options_.kernel_size())
|
||||
.stride(options_.stride())
|
||||
.padding(options_.padding())
|
||||
.dilation(options_.dilation())
|
||||
.transposed(false)
|
||||
.output_padding(0)
|
||||
.groups(options_.groups())
|
||||
.bias(options_.bias())
|
||||
.padding_mode(options_.padding_mode())) {}
|
||||
|
||||
Tensor Conv2dImpl::forward(const Tensor& input) {
|
||||
if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) {
|
||||
@ -71,8 +95,20 @@ Tensor Conv2dImpl::forward(const Tensor& input) {
|
||||
}
|
||||
|
||||
Conv3dImpl::Conv3dImpl(
|
||||
ConvOptions<3> options_)
|
||||
: ConvImpl(options_.transposed(false).output_padding(0)) {}
|
||||
Conv3dOptions options_)
|
||||
: ConvNdImpl(
|
||||
detail::ConvNdOptions<3>(
|
||||
/*in_channels=*/options_.in_channels(),
|
||||
/*out_channels=*/options_.out_channels(),
|
||||
/*kernel_size=*/options_.kernel_size())
|
||||
.stride(options_.stride())
|
||||
.padding(options_.padding())
|
||||
.dilation(options_.dilation())
|
||||
.transposed(false)
|
||||
.output_padding(0)
|
||||
.groups(options_.groups())
|
||||
.bias(options_.bias())
|
||||
.padding_mode(options_.padding_mode())) {}
|
||||
|
||||
Tensor Conv3dImpl::forward(const Tensor& input) {
|
||||
if (c10::get_if<enumtype::kCircular>(&options.padding_mode())) {
|
||||
@ -98,14 +134,14 @@ Tensor Conv3dImpl::forward(const Tensor& input) {
|
||||
options.groups());
|
||||
}
|
||||
|
||||
template class ConvImpl<1, Conv1dImpl>;
|
||||
template class ConvImpl<2, Conv2dImpl>;
|
||||
template class ConvImpl<3, Conv3dImpl>;
|
||||
template class ConvNdImpl<1, Conv1dImpl>;
|
||||
template class ConvNdImpl<2, Conv2dImpl>;
|
||||
template class ConvNdImpl<3, Conv3dImpl>;
|
||||
|
||||
// ============================================================================
|
||||
|
||||
template <size_t D, typename Derived>
|
||||
std::vector<int64_t> ConvTransposeImpl<D, Derived>::_output_padding(
|
||||
std::vector<int64_t> ConvTransposeNdImpl<D, Derived>::_output_padding(
|
||||
const Tensor& input, const c10::optional<at::IntArrayRef>& output_size,
|
||||
const ExpandingArray<D>& stride, const ExpandingArray<D>& padding,
|
||||
const ExpandingArray<D>& kernel_size) {
|
||||
@ -151,7 +187,20 @@ std::vector<int64_t> ConvTransposeImpl<D, Derived>::_output_padding(
|
||||
}
|
||||
|
||||
ConvTranspose1dImpl::ConvTranspose1dImpl(
|
||||
ConvTransposeOptions<1> options_) : ConvTransposeImpl(options_.transposed(true)) {}
|
||||
ConvTranspose1dOptions options_)
|
||||
: ConvTransposeNdImpl(
|
||||
detail::ConvNdOptions<1>(
|
||||
/*in_channels=*/options_.in_channels(),
|
||||
/*out_channels=*/options_.out_channels(),
|
||||
/*kernel_size=*/options_.kernel_size())
|
||||
.stride(options_.stride())
|
||||
.padding(options_.padding())
|
||||
.dilation(options_.dilation())
|
||||
.transposed(true)
|
||||
.output_padding(options_.output_padding())
|
||||
.groups(options_.groups())
|
||||
.bias(options_.bias())
|
||||
.padding_mode(options_.padding_mode())) {}
|
||||
|
||||
Tensor ConvTranspose1dImpl::forward(
|
||||
const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) {
|
||||
@ -168,7 +217,19 @@ Tensor ConvTranspose1dImpl::forward(
|
||||
}
|
||||
|
||||
ConvTranspose2dImpl::ConvTranspose2dImpl(
|
||||
ConvTransposeOptions<2> options_) : ConvTransposeImpl(options_.transposed(true)) {}
|
||||
ConvTranspose2dOptions options_)
|
||||
: ConvTransposeNdImpl(detail::ConvNdOptions<2>(
|
||||
/*in_channels=*/options_.in_channels(),
|
||||
/*out_channels=*/options_.out_channels(),
|
||||
/*kernel_size=*/options_.kernel_size())
|
||||
.stride(options_.stride())
|
||||
.padding(options_.padding())
|
||||
.dilation(options_.dilation())
|
||||
.transposed(true)
|
||||
.output_padding(options_.output_padding())
|
||||
.groups(options_.groups())
|
||||
.bias(options_.bias())
|
||||
.padding_mode(options_.padding_mode())) {}
|
||||
|
||||
Tensor ConvTranspose2dImpl::forward(
|
||||
const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) {
|
||||
@ -185,7 +246,19 @@ Tensor ConvTranspose2dImpl::forward(
|
||||
}
|
||||
|
||||
ConvTranspose3dImpl::ConvTranspose3dImpl(
|
||||
ConvTransposeOptions<3> options_) : ConvTransposeImpl(options_.transposed(true)) {}
|
||||
ConvTranspose3dOptions options_)
|
||||
: ConvTransposeNdImpl(detail::ConvNdOptions<3>(
|
||||
/*in_channels=*/options_.in_channels(),
|
||||
/*out_channels=*/options_.out_channels(),
|
||||
/*kernel_size=*/options_.kernel_size())
|
||||
.stride(options_.stride())
|
||||
.padding(options_.padding())
|
||||
.dilation(options_.dilation())
|
||||
.transposed(true)
|
||||
.output_padding(options_.output_padding())
|
||||
.groups(options_.groups())
|
||||
.bias(options_.bias())
|
||||
.padding_mode(options_.padding_mode())) {}
|
||||
|
||||
Tensor ConvTranspose3dImpl::forward(
|
||||
const Tensor& input, const c10::optional<at::IntArrayRef>& output_size) {
|
||||
@ -201,9 +274,9 @@ Tensor ConvTranspose3dImpl::forward(
|
||||
output_padding, options.groups(), options.dilation());
|
||||
}
|
||||
|
||||
template class ConvTransposeImpl<1, ConvTranspose1dImpl>;
|
||||
template class ConvTransposeImpl<2, ConvTranspose2dImpl>;
|
||||
template class ConvTransposeImpl<3, ConvTranspose3dImpl>;
|
||||
template class ConvTransposeNdImpl<1, ConvTranspose1dImpl>;
|
||||
template class ConvTransposeNdImpl<2, ConvTranspose2dImpl>;
|
||||
template class ConvTransposeNdImpl<3, ConvTranspose3dImpl>;
|
||||
|
||||
} // namespace nn
|
||||
} // namespace torch
|
||||
|
@ -36,7 +36,8 @@ PyObject* rpc_init(PyObject* /* unused */) {
|
||||
|
||||
auto rpcBackendOptions =
|
||||
shared_ptr_class_<RpcBackendOptions>(module, "RpcBackendOptions")
|
||||
.def_readwrite("rpc_timeout", &RpcBackendOptions::rpcTimeout);
|
||||
.def_readwrite("rpc_timeout", &RpcBackendOptions::rpcTimeout)
|
||||
.def_readwrite("init_method", &RpcBackendOptions::initMethod);
|
||||
|
||||
auto workerInfo =
|
||||
shared_ptr_class_<WorkerInfo>(
|
||||
@ -111,9 +112,9 @@ Otherwise, throws an exception.
|
||||
return PyRRef::unpickle(t);
|
||||
}));
|
||||
|
||||
// future.wait() should not be called after wait_all_workers(), e.g.,
|
||||
// pythonRpcHandler is cleaned up in wait_all_workers(), after
|
||||
// wait_all_workers(), python objects returned from rpc python call can not be
|
||||
// future.wait() should not be called after shutdown(), e.g.,
|
||||
// pythonRpcHandler is cleaned up in shutdown(), after
|
||||
// shutdown(), python objects returned from rpc python call can not be
|
||||
// resolved.
|
||||
auto futureMessage =
|
||||
shared_ptr_class_<FutureMessage>(module, "FutureMessage")
|
||||
@ -154,6 +155,10 @@ Otherwise, throws an exception.
|
||||
"join",
|
||||
&ProcessGroupAgent::join,
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"shutdown",
|
||||
&ProcessGroupAgent::shutdown,
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"sync",
|
||||
&ProcessGroupAgent::sync,
|
||||
@ -164,8 +169,8 @@ Otherwise, throws an exception.
|
||||
agent->start();
|
||||
});
|
||||
|
||||
module.def("_destroy_rref_context", []() {
|
||||
RRefContext::getInstance().destroyInstance();
|
||||
module.def("_destroy_rref_context", [](bool ignoreRRefLeak) {
|
||||
RRefContext::getInstance().destroyInstance(ignoreRRefLeak);
|
||||
});
|
||||
|
||||
module.def("_cleanup_python_rpc_handler", []() {
|
||||
|
@ -127,7 +127,6 @@ ProcessGroupAgent::ProcessGroupAgent(
|
||||
WorkerInfo(std::move(workerName), pg->getRank()),
|
||||
c10::guts::make_unique<RequestCallbackImpl>(),
|
||||
rpcTimeout),
|
||||
shutdown_{false},
|
||||
pg_(std::move(pg)),
|
||||
sendCounts_(pg_->getSize()),
|
||||
recvCounts_(pg_->getSize()),
|
||||
@ -180,30 +179,12 @@ const WorkerInfo& ProcessGroupAgent::getWorkerInfo(worker_id_t id) const {
|
||||
}
|
||||
|
||||
void ProcessGroupAgent::join() {
|
||||
// Every process i sends a SHUTDOWN message to process i + 1. This is
|
||||
// necessary for now because:
|
||||
// 1. There is no abort API for ProcessGroup::recvAnysource yet. We have to
|
||||
// feed it a message or kill the thread.
|
||||
// 2. A GLOO process cannot send message to itself. (there is an ongoing
|
||||
// effort to fix this problem).
|
||||
shutdown_.store(true);
|
||||
sync();
|
||||
// This is needed in case no futures were created, otherwise the future
|
||||
// timeout watchdog would sleep forever.
|
||||
|
||||
futureTimeoutCV_.notify_one();
|
||||
std::unique_lock<std::mutex> lock(futureMutex_);
|
||||
futureCV_.wait(
|
||||
lock, [this] { return futures_.empty() && futureTimeouts_.empty(); });
|
||||
lock.unlock();
|
||||
pg_->barrier()->wait();
|
||||
int dst = (pg_->getRank() + 1) % pg_->getSize();
|
||||
enqueueSend(
|
||||
SendWork(allWorkerInfo_[dst], Message({}, {}, MessageType::SHUTDOWN)));
|
||||
threadPool_.waitWorkComplete();
|
||||
listenerThread_.join();
|
||||
futureTimeoutThread_.join();
|
||||
PythonRpcHandler::getInstance().cleanup();
|
||||
}
|
||||
|
||||
bool ProcessGroupAgent::hasPendingMessage() {
|
||||
@ -269,14 +250,38 @@ void ProcessGroupAgent::sync() {
|
||||
}
|
||||
|
||||
void ProcessGroupAgent::start() {
|
||||
{
|
||||
std::lock_guard<std::mutex> futureLock{futureMutex_};
|
||||
rpcRunning_.store(true);
|
||||
}
|
||||
listenerThread_ = std::thread(&ProcessGroupAgent::listenLoop, this);
|
||||
futureTimeoutThread_ =
|
||||
std::thread(&ProcessGroupAgent::pollTimedOutRPCs, this);
|
||||
}
|
||||
|
||||
void ProcessGroupAgent::shutdown() {
|
||||
LOG(INFO) << "Shutting down ProcessGroupAgent.";
|
||||
std::unique_lock<std::mutex> lock{futureMutex_};
|
||||
if (!rpcRunning_.exchange(false)) {
|
||||
return;
|
||||
}
|
||||
lock.unlock();
|
||||
futureTimeoutCV_.notify_one();
|
||||
futureTimeoutThread_.join();
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(recvWorkMutex_);
|
||||
if (recvWork_) {
|
||||
recvWork_->abort();
|
||||
}
|
||||
}
|
||||
threadPool_.waitWorkComplete();
|
||||
listenerThread_.join();
|
||||
}
|
||||
|
||||
std::shared_ptr<FutureMessage> ProcessGroupAgent::send(
|
||||
const WorkerInfo& to,
|
||||
Message&& message) {
|
||||
TORCH_CHECK(rpcRunning_.load(), "ProcessGroupAgent hasn't started.")
|
||||
TORCH_CHECK(
|
||||
to.id_ < (worker_id_t)pg_->getSize(),
|
||||
"Destination rank is out of bound, got ",
|
||||
@ -456,10 +461,19 @@ void ProcessGroupAgent::enqueueRecv(RecvWork work) {
|
||||
}
|
||||
|
||||
void ProcessGroupAgent::listenLoop() {
|
||||
while (true) {
|
||||
while (rpcRunning_.load()) {
|
||||
// rank, tensor size, message type
|
||||
std::vector<torch::Tensor> preamble = {torch::empty({3}, {torch::kInt64})};
|
||||
pg_->recvAnysource(preamble, pg_->getRank())->wait();
|
||||
auto work = pg_->recvAnysource(preamble, pg_->getRank());
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(recvWorkMutex_);
|
||||
recvWork_ = work;
|
||||
}
|
||||
|
||||
if (!rpcRunning_.load() || !work->wait() /* aborted */) {
|
||||
return;
|
||||
}
|
||||
|
||||
int64_t* preamble_items = preamble.front().storage().data<int64_t>();
|
||||
|
||||
auto srcRank = preamble_items[0];
|
||||
@ -483,9 +497,12 @@ void ProcessGroupAgent::listenLoop() {
|
||||
}
|
||||
|
||||
void ProcessGroupAgent::pollTimedOutRPCs() {
|
||||
while (!shutdown_.load()) {
|
||||
std::chrono::milliseconds sleepTime;
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> lock{futureMutex_};
|
||||
if (!rpcRunning_.load()) {
|
||||
return;
|
||||
}
|
||||
std::chrono::milliseconds sleepTime;
|
||||
// Estimate amount of time the first future will time out in, and sleep
|
||||
// for that long.
|
||||
// if there are no futures or the first future's RPC timeout is set to 0
|
||||
@ -505,7 +522,7 @@ void ProcessGroupAgent::pollTimedOutRPCs() {
|
||||
futureTimeoutCV_.wait_for(lock, sleepTime);
|
||||
}
|
||||
|
||||
if (shutdown_.load()) {
|
||||
if (!rpcRunning_.load()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@ namespace distributed {
|
||||
namespace rpc {
|
||||
|
||||
struct ProcessGroupRpcBackendOptions : public RpcBackendOptions {
|
||||
ProcessGroupRpcBackendOptions() noexcept = default;
|
||||
ProcessGroupRpcBackendOptions() = default;
|
||||
int numSendRecvThreads;
|
||||
};
|
||||
|
||||
@ -57,6 +57,8 @@ class ProcessGroupAgent : public RpcAgent {
|
||||
|
||||
void start() override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
protected:
|
||||
// This method wraps the destination information and the message into a
|
||||
// SendWork object, and put the SendWork into a queue. Another thread will
|
||||
@ -143,10 +145,6 @@ class ProcessGroupAgent : public RpcAgent {
|
||||
return ++nextId_;
|
||||
}
|
||||
|
||||
// atomic bool indicating if join() has been called and background threads
|
||||
// should shutdown.
|
||||
std::atomic_bool shutdown_;
|
||||
|
||||
std::shared_ptr<c10d::ProcessGroup> pg_;
|
||||
// worker name -> rank
|
||||
std::unordered_map<std::string, int> nameMap_;
|
||||
@ -159,12 +157,23 @@ class ProcessGroupAgent : public RpcAgent {
|
||||
MessageCounter recvCounts_;
|
||||
|
||||
std::atomic<int64_t> nextId_;
|
||||
// atomic bool indicating if this agent is running. It is set in
|
||||
// ProcessGroupAgent::start and unset in ProcessGroupAgent::shutdown and
|
||||
// ProcessGroupAgent::join. It controls whether several background threads
|
||||
// should be running.
|
||||
// We lock access to this in shutdown() and pollTimedOutRPCs() to prevent race
|
||||
// conditions when notifying condition variables.
|
||||
std::atomic<bool> rpcRunning_{false};
|
||||
// one mutex per ProcessGroup rank, as ProcessGroup::send is not thread-safe
|
||||
// when using the same tag.
|
||||
std::vector<std::mutex> sendMutexes_;
|
||||
std::thread listenerThread_;
|
||||
// A thread to poll existing futures and check for timed out ones.
|
||||
std::thread futureTimeoutThread_;
|
||||
// Lock and shared ptr to currently pending work, set in listenloop() and
|
||||
// interruptible in shutdown().
|
||||
std::mutex recvWorkMutex_;
|
||||
std::shared_ptr<c10d::ProcessGroup::Work> recvWork_;
|
||||
// A threadPool that processing both SendWork and RecvWork. There are two
|
||||
// motivations for adding a ThreadPool:
|
||||
// (1) RPC serialization/deserialization and processing can be expensive,
|
||||
|
@ -13,8 +13,9 @@ namespace distributed {
|
||||
namespace rpc {
|
||||
|
||||
struct RpcBackendOptions {
|
||||
RpcBackendOptions() noexcept = default;
|
||||
RpcBackendOptions() = default;
|
||||
std::chrono::milliseconds rpcTimeout;
|
||||
std::string initMethod;
|
||||
};
|
||||
|
||||
// A globally unique ID to identify an RpcAgent
|
||||
@ -124,7 +125,11 @@ class TORCH_API RpcAgent {
|
||||
virtual void sync() = 0;
|
||||
|
||||
// start accepting requests
|
||||
virtual void start() {}
|
||||
virtual void start() = 0;
|
||||
|
||||
// Stop accepting requests and shutdown the RPC framework as soon as possible
|
||||
// by terminating all RPC threads.
|
||||
virtual void shutdown() = 0;
|
||||
|
||||
// Set the default rpc agent.
|
||||
static void setDefaultRpcAgent(std::shared_ptr<RpcAgent> defaultRpcAgent);
|
||||
|
@ -136,15 +136,16 @@ UserRRef<T>::UserRRef(
|
||||
|
||||
template <typename T>
|
||||
UserRRef<T>::~UserRRef() {
|
||||
// TODO: queue this in RRefContext instead of doing it here.
|
||||
auto& ctx = RRefContext::getInstance();
|
||||
if (ctx.getWorkerId() != ownerId_) {
|
||||
auto fm = ctx.agent()->send(
|
||||
ctx.agent()->getWorkerInfo(ownerId_),
|
||||
RRefUserDelete(rrefId_, forkId_).toMessage());
|
||||
|
||||
fm->addCallback(
|
||||
[](const Message& message) { RRefContext::handleException(message); });
|
||||
try {
|
||||
RRefContext::getInstance().delUser(ownerId_, rrefId_, forkId_);
|
||||
} catch (const std::exception& ex) {
|
||||
LOG(ERROR) << "Error occurred when deleting UserRRef instance, "
|
||||
<< "RRefId = " << rrefId_ << ", ForkId = " << forkId_ << " : "
|
||||
<< ex.what();
|
||||
} catch (...) {
|
||||
LOG(ERROR) << "Error occurred when deleting UserRRef instance, "
|
||||
<< "RRefId = " << rrefId_ << ", ForkId = " << forkId_ << " : "
|
||||
<< "unknown error";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,13 @@ RRefContext& RRefContext::getInstance() {
|
||||
return *context;
|
||||
}
|
||||
|
||||
void RRefContext::destroyInstance() {
|
||||
RRefContext::getInstance().checkRRefLeaks();
|
||||
void RRefContext::destroyInstance(bool ignoreRRefLeak) {
|
||||
auto& ctx = RRefContext::getInstance();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(ctx.destroyedMutex_);
|
||||
ctx.destroyed_ = true;
|
||||
}
|
||||
ctx.checkRRefLeaks(ignoreRRefLeak);
|
||||
}
|
||||
|
||||
void RRefContext::handleException(const Message& message) {
|
||||
@ -27,7 +32,7 @@ void RRefContext::handleException(const Message& message) {
|
||||
}
|
||||
|
||||
RRefContext::RRefContext(std::shared_ptr<RpcAgent> agent)
|
||||
: agent_(std::move(agent)) {}
|
||||
: agent_(std::move(agent)), destroyed_(false) {}
|
||||
|
||||
RRefContext::~RRefContext() {
|
||||
if (!owners_.empty()) {
|
||||
@ -36,7 +41,7 @@ RRefContext::~RRefContext() {
|
||||
}
|
||||
}
|
||||
|
||||
void RRefContext::checkRRefLeaks() {
|
||||
void RRefContext::checkRRefLeaks(bool ignoreRRefLeak) {
|
||||
if (!forks_.empty()) {
|
||||
std::stringstream ss;
|
||||
for (auto& entry : forks_) {
|
||||
@ -46,7 +51,21 @@ void RRefContext::checkRRefLeaks() {
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
AT_ERROR(ss.str());
|
||||
|
||||
if (ignoreRRefLeak) {
|
||||
LOG(WARNING)
|
||||
<< "Detected RRef Leaks during shutdown. This usually "
|
||||
<< "occurs when the application code still holds references to RRef "
|
||||
<< "instances when calling shutdown(). If the program has "
|
||||
<< "completed correctly and the process is exiting, it is OK to "
|
||||
<< "ignore these leaks. However, if you program will keep running "
|
||||
<< "after this, these leaks could result in memory leaks on RRef "
|
||||
<< "owners. Please make sure all RRefs are out of scope and Python "
|
||||
<< "GC has deleted them before calling shutdown(): \n"
|
||||
<< ss.str();
|
||||
} else {
|
||||
AT_ERROR(ss.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,6 +115,21 @@ template std::shared_ptr<UserRRef<py::object>> RRefContext::createUserRRef<
|
||||
const RRefId& rrefId,
|
||||
const ForkId& forkId);
|
||||
|
||||
void RRefContext::delUser(
|
||||
const worker_id_t owner,
|
||||
const RRefId& rrefId,
|
||||
const ForkId& forkId) {
|
||||
std::lock_guard<std::mutex> lock(destroyedMutex_);
|
||||
if (!destroyed_) {
|
||||
auto fm = agent_->send(
|
||||
agent_->getWorkerInfo(owner),
|
||||
RRefUserDelete(rrefId, forkId).toMessage());
|
||||
|
||||
fm->addCallback(
|
||||
[](const Message& message) { RRefContext::handleException(message); });
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<RRef> RRefContext::getOrCreateRRef(const RRefForkData& rfd) {
|
||||
auto& ownerId = rfd.ownerId_;
|
||||
|
@ -16,7 +16,7 @@ namespace rpc {
|
||||
class RRefContext {
|
||||
public:
|
||||
static RRefContext& getInstance();
|
||||
static void destroyInstance();
|
||||
static void destroyInstance(bool ignoreRRefLeak = true);
|
||||
|
||||
static void handleException(const Message& message);
|
||||
|
||||
@ -111,6 +111,11 @@ class RRefContext {
|
||||
void addPendingUser(const ForkId& forkId, const std::shared_ptr<RRef>& rref);
|
||||
void delPendingUser(const ForkId& forkId);
|
||||
|
||||
void delUser(
|
||||
const worker_id_t owner,
|
||||
const RRefId& rrefId,
|
||||
const ForkId& forkId);
|
||||
|
||||
private:
|
||||
RRefContext(std::shared_ptr<RpcAgent>);
|
||||
|
||||
@ -123,7 +128,7 @@ class RRefContext {
|
||||
void finishForkRequest(const ForkId& forkId, worker_id_t parent);
|
||||
|
||||
// If there is any leak on any RRef, this method will throw an error.
|
||||
void checkRRefLeaks();
|
||||
void checkRRefLeaks(bool ignoreRRefLeak);
|
||||
|
||||
static std::atomic<local_id_t> nextLocalId_;
|
||||
|
||||
@ -157,6 +162,9 @@ class RRefContext {
|
||||
// owner learns about the forked child.
|
||||
std::unordered_map<ForkId, std::shared_ptr<RRef>, ForkId::Hash>
|
||||
pendingChildren_;
|
||||
|
||||
std::mutex destroyedMutex_;
|
||||
bool destroyed_;
|
||||
};
|
||||
|
||||
} // namespace rpc
|
||||
|
@ -751,5 +751,33 @@ std::tuple<std::string, RawDataExportMap> export_onnx(
|
||||
graph_encoder.get_raw_data_export_map());
|
||||
}
|
||||
|
||||
namespace {
|
||||
void export_opnames(const script::Module& m, std::set<std::string>& opnames) {
|
||||
for (const auto& method : m.get_methods()) {
|
||||
const auto& func = method.function();
|
||||
for (const auto& node : func.graph()->nodes()) {
|
||||
auto op = findOperatorFor(node);
|
||||
if (op) {
|
||||
auto opname = node->schema().operator_name();
|
||||
std::string namestr = opname.name;
|
||||
if (!opname.overload_name.empty()) {
|
||||
namestr += "." + opname.overload_name;
|
||||
}
|
||||
opnames.emplace(namestr);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const auto& sub_m : m.children()) {
|
||||
export_opnames(sub_m, opnames);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
std::vector<std::string> export_opnames(const script::Module& m) {
|
||||
std::set<std::string> names;
|
||||
export_opnames(m, names);
|
||||
return std::vector<std::string>(names.begin(), names.end());
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
} // namespace torch
|
||||
|
@ -65,5 +65,8 @@ using ExportModuleExtraFilesHook =
|
||||
std::function<script::ExtraFilesMap(const script::Module&)>;
|
||||
TORCH_API void SetExportModuleExtraFilesHook(ExportModuleExtraFilesHook hook);
|
||||
|
||||
// Returns a list of names of all operators in the module and its submodules.
|
||||
TORCH_API std::vector<std::string> export_opnames(const script::Module& m);
|
||||
|
||||
} // namespace jit
|
||||
} // namespace torch
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user