mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-22 22:25:10 +08:00
Compare commits
110 Commits
cslpull76
...
v1.5.0-rc3
Author | SHA1 | Date | |
---|---|---|---|
b58f89b2e4 | |||
87b6685c6b | |||
f746f1b746 | |||
1379415150 | |||
7d638d2596 | |||
bad005d331 | |||
16d8a52407 | |||
a33b264588 | |||
3a67e00889 | |||
6bd039551d | |||
b6c3058d61 | |||
ed908b4fbc | |||
b66e0af58b | |||
bf8a5ede96 | |||
c2bc5c56c5 | |||
db3c3ed662 | |||
9de4770bbd | |||
911a2a6b63 | |||
60375bcfdf | |||
63dcd9eccc | |||
e8236d2ed4 | |||
0058b1bb7e | |||
419283e291 | |||
0e6f6ba218 | |||
ec8dbaf920 | |||
7e168d134f | |||
6daae58871 | |||
fee0ff1bf6 | |||
deaf3b65cf | |||
dca9c2501d | |||
842cd47416 | |||
a30b49085c | |||
82626f8ad9 | |||
27fddfda4f | |||
7ecf6a1c10 | |||
beb07a44c4 | |||
a01c3bd1fe | |||
ffd010f8a0 | |||
8ad59f03a8 | |||
ed3640df68 | |||
fb88942f6c | |||
5d05c51887 | |||
df5986fbf3 | |||
165403f614 | |||
fbf18c34ff | |||
84f806c821 | |||
94139a7d95 | |||
75e36186b2 | |||
f4a0b406dd | |||
e884e720f0 | |||
dacdbc22d1 | |||
2a789cd0e0 | |||
f9b010f399 | |||
55614ff306 | |||
b12579da53 | |||
920e3eb761 | |||
bec01e755a | |||
6a880e1bc9 | |||
fa86e32a4e | |||
5aabaf2b18 | |||
4a707e8f95 | |||
db127b21eb | |||
45313cd9e1 | |||
df531973e1 | |||
9e3c577caa | |||
5357b8e4d9 | |||
0f23d23db4 | |||
7c24280a3f | |||
7100f0be13 | |||
f7f611c2ec | |||
acb982d0b0 | |||
aa8b7ad989 | |||
2d403ed8be | |||
c25a664f77 | |||
ab660ae394 | |||
3c476a8858 | |||
651fa88645 | |||
565c3400b4 | |||
3e332778b4 | |||
f598738920 | |||
4c6bfa0187 | |||
6f25003682 | |||
752c129fa1 | |||
fb59a9caca | |||
4d30dbdd35 | |||
b7f4a1a397 | |||
afda1dc943 | |||
d506ae882b | |||
36e5abe531 | |||
6e6f62230e | |||
5d15577e6c | |||
6aa5298c5c | |||
f3df13725b | |||
4eee3caa11 | |||
4d96463130 | |||
246b824644 | |||
5ca9014318 | |||
48590d6a9b | |||
75148df1f5 | |||
b89eb7c654 | |||
8877885454 | |||
e2184ba083 | |||
8ef47ad2f0 | |||
6725b6f503 | |||
bcd3f6da1a | |||
0b3d2f7b7d | |||
f522651a7e | |||
01c8ef2757 | |||
7cfe68ce3a | |||
6f3120c6b9 |
@ -466,7 +466,7 @@ But if you want to try, then I’d recommend
|
||||
# Always install miniconda 3, even if building for Python <3
|
||||
new_conda="~/my_new_conda"
|
||||
conda_sh="$new_conda/install_miniconda.sh"
|
||||
curl -o "$conda_sh" https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||
curl -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||
chmod +x "$conda_sh"
|
||||
"$conda_sh" -b -p "$MINICONDA_ROOT"
|
||||
rm -f "$conda_sh"
|
||||
|
@ -34,8 +34,6 @@ def get_processor_arch_name(cuda_version):
|
||||
|
||||
LINUX_PACKAGE_VARIANTS = OrderedDict(
|
||||
manywheel=[
|
||||
"2.7m",
|
||||
"2.7mu",
|
||||
"3.5m",
|
||||
"3.6m",
|
||||
"3.7m",
|
||||
@ -43,7 +41,7 @@ LINUX_PACKAGE_VARIANTS = OrderedDict(
|
||||
],
|
||||
conda=dimensions.STANDARD_PYTHON_VERSIONS,
|
||||
libtorch=[
|
||||
"2.7m",
|
||||
"3.7m",
|
||||
],
|
||||
)
|
||||
|
||||
@ -53,7 +51,7 @@ CONFIG_TREE_DATA = OrderedDict(
|
||||
wheel=dimensions.STANDARD_PYTHON_VERSIONS,
|
||||
conda=dimensions.STANDARD_PYTHON_VERSIONS,
|
||||
libtorch=[
|
||||
"2.7",
|
||||
"3.7",
|
||||
],
|
||||
)),
|
||||
)
|
||||
|
@ -67,9 +67,14 @@ class Conf(object):
|
||||
job_def["requires"].append("update_s3_htmls_for_nightlies_devtoolset7")
|
||||
job_def["filters"] = {"branches": {"only": "postnightly"}}
|
||||
else:
|
||||
filter_branches = ["nightly"]
|
||||
# we only want to add the release branch filter if we aren't
|
||||
# uploading
|
||||
if phase not in ["upload"]:
|
||||
filter_branches.append(r"/release\/.*/")
|
||||
job_def["filters"] = {
|
||||
"branches": {
|
||||
"only": "nightly"
|
||||
"only": filter_branches
|
||||
},
|
||||
# Will run on tags like v1.5.0-rc1, etc.
|
||||
"tags": {
|
||||
|
@ -4,7 +4,6 @@ from cimodel.lib.conf_tree import Ver
|
||||
|
||||
CONFIG_TREE_DATA = [
|
||||
(Ver("ubuntu", "16.04"), [
|
||||
([Ver("gcc", "5")], [XImportant("onnx_py2")]),
|
||||
([Ver("clang", "7")], [XImportant("onnx_main_py3.6"),
|
||||
XImportant("onnx_ort1_py3.6"),
|
||||
XImportant("onnx_ort2_py3.6")]),
|
||||
|
@ -33,8 +33,7 @@ class Conf:
|
||||
# TODO: Eventually we can probably just remove the cudnn7 everywhere.
|
||||
def get_cudnn_insertion(self):
|
||||
|
||||
omit = self.language == "onnx_py2" \
|
||||
or self.language == "onnx_main_py3.6" \
|
||||
omit = self.language == "onnx_main_py3.6" \
|
||||
or self.language == "onnx_ort1_py3.6" \
|
||||
or self.language == "onnx_ort2_py3.6" \
|
||||
or set(self.compiler_names).intersection({"android", "mkl", "clang"}) \
|
||||
@ -71,11 +70,10 @@ class Conf:
|
||||
def gen_docker_image(self):
|
||||
|
||||
lang_substitutions = {
|
||||
"onnx_py2": "py2",
|
||||
"onnx_main_py3.6": "py3.6",
|
||||
"onnx_ort1_py3.6": "py3.6",
|
||||
"onnx_ort2_py3.6": "py3.6",
|
||||
"cmake": "py2",
|
||||
"cmake": "py3",
|
||||
}
|
||||
|
||||
lang = miniutils.override(self.language, lang_substitutions)
|
||||
@ -85,7 +83,7 @@ class Conf:
|
||||
def gen_workflow_params(self, phase):
|
||||
parameters = OrderedDict()
|
||||
lang_substitutions = {
|
||||
"onnx_py2": "onnx-py2",
|
||||
"onnx_py3": "onnx-py3",
|
||||
"onnx_main_py3.6": "onnx-main-py3.6",
|
||||
"onnx_ort1_py3.6": "onnx-ort1-py3.6",
|
||||
"onnx_ort2_py3.6": "onnx-ort2-py3.6",
|
||||
@ -129,7 +127,7 @@ class Conf:
|
||||
job_name = "caffe2_" + self.get_platform() + "_build"
|
||||
|
||||
if not self.is_important:
|
||||
job_def["filters"] = {"branches": {"only": ["master", r"/ci-all\/.*/"]}}
|
||||
job_def["filters"] = {"branches": {"only": ["master", r"/ci-all\/.*/", r"/release\/.*/"]}}
|
||||
job_def.update(self.gen_workflow_params(phase))
|
||||
return {job_name : job_def}
|
||||
|
||||
|
@ -8,7 +8,6 @@ CUDA_VERSIONS = [
|
||||
]
|
||||
|
||||
STANDARD_PYTHON_VERSIONS = [
|
||||
"2.7",
|
||||
"3.5",
|
||||
"3.6",
|
||||
"3.7",
|
||||
|
@ -114,7 +114,7 @@ class Conf:
|
||||
if not self.is_important:
|
||||
# If you update this, update
|
||||
# caffe2_build_definitions.py too
|
||||
job_def["filters"] = {"branches": {"only": ["master", r"/ci-all\/.*/"]}}
|
||||
job_def["filters"] = {"branches": {"only": ["master", r"/ci-all\/.*/", r"/release\/.*/"]}}
|
||||
job_def.update(self.gen_workflow_params(phase))
|
||||
|
||||
return {job_name : job_def}
|
||||
|
2225
.circleci/config.yml
2225
.circleci/config.yml
File diff suppressed because it is too large
Load Diff
@ -4,7 +4,7 @@ set -ex
|
||||
|
||||
# Optionally install conda
|
||||
if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
BASE_URL="https://repo.continuum.io/miniconda"
|
||||
BASE_URL="https://repo.anaconda.com/miniconda"
|
||||
|
||||
MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
|
||||
|
||||
|
@ -31,9 +31,9 @@ fi
|
||||
|
||||
conda_sh="$workdir/install_miniconda.sh"
|
||||
if [[ "$(uname)" == Darwin ]]; then
|
||||
curl --retry 3 -o "$conda_sh" https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||
curl --retry 3 -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
|
||||
else
|
||||
curl --retry 3 -o "$conda_sh" https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
||||
curl --retry 3 -o "$conda_sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
||||
fi
|
||||
chmod +x "$conda_sh"
|
||||
"$conda_sh" -b -p "$MINICONDA_ROOT"
|
||||
|
@ -2,6 +2,19 @@
|
||||
set -eux -o pipefail
|
||||
export TZ=UTC
|
||||
|
||||
tagged_version() {
|
||||
# Grabs version from either the env variable CIRCLE_TAG
|
||||
# or the pytorch git described version
|
||||
GIT_DESCRIBE="git --git-dir ${workdir}/pytorch/.git describe"
|
||||
if [[ -n "${CIRCLE_TAG:-}" ]]; then
|
||||
echo "${CIRCLE_TAG}"
|
||||
elif ${GIT_DESCRIBE} --exact --tags >/dev/null; then
|
||||
${GIT_DESCRIBE} --tags
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# We need to write an envfile to persist these variables to following
|
||||
# steps, but the location of the envfile depends on the circleci executor
|
||||
if [[ "$(uname)" == Darwin ]]; then
|
||||
@ -47,15 +60,17 @@ export DATE="$(date -u +%Y%m%d)"
|
||||
#TODO: We should be pulling semver version from the base version.txt
|
||||
BASE_BUILD_VERSION="1.5.0.dev$DATE"
|
||||
# Change BASE_BUILD_VERSION to git tag when on a git tag
|
||||
if git describe --tags --exact >/dev/null 2>/dev/null; then
|
||||
# Use 'git -C' to make doubly sure we're in the correct directory for checking
|
||||
# the git tag
|
||||
if tagged_version >/dev/null; then
|
||||
# Switch upload folder to 'test/' if we are on a tag
|
||||
PIP_UPLOAD_FOLDER='test/'
|
||||
# Grab git tag, remove prefixed v and remove everything after -
|
||||
# Used to clean up tags that are for release candidates like v1.5.0-rc1
|
||||
# Turns tag v1.5.0-rc1 -> v1.5.0
|
||||
BASE_BUILD_VERSION="$(git describe --tags | sed -e 's/^v//' -e 's/-.*$//')"
|
||||
BASE_BUILD_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')"
|
||||
fi
|
||||
if [[ "$(uname)" == 'Darwin' ]] || [[ "$DESIRED_CUDA" == "cu101" ]] || [[ "$PACKAGE_TYPE" == conda ]]; then
|
||||
if [[ "$(uname)" == 'Darwin' ]] || [[ "$DESIRED_CUDA" == "cu102" ]] || [[ "$PACKAGE_TYPE" == conda ]]; then
|
||||
export PYTORCH_BUILD_VERSION="${BASE_BUILD_VERSION}"
|
||||
else
|
||||
export PYTORCH_BUILD_VERSION="${BASE_BUILD_VERSION}+$DESIRED_CUDA"
|
||||
|
@ -72,10 +72,10 @@ time python tools/setup_helpers/generate_code.py \
|
||||
|
||||
# Build the docs
|
||||
pushd docs/cpp
|
||||
pip install breathe>=4.13.0 bs4 lxml six
|
||||
pip install breathe==4.13.0 bs4 lxml six
|
||||
pip install --no-cache-dir -e "git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme"
|
||||
pip install exhale>=0.2.1
|
||||
pip install sphinx>=2.0
|
||||
pip install sphinx==2.4.4
|
||||
# Uncomment once it is fixed
|
||||
# pip install -r requirements.txt
|
||||
time make VERBOSE=1 html -j
|
||||
|
@ -151,7 +151,7 @@
|
||||
# Install Anaconda if we need to
|
||||
if [ -n "${CAFFE2_USE_ANACONDA}" ]; then
|
||||
rm -rf ${TMPDIR}/anaconda
|
||||
curl --retry 3 -o ${TMPDIR}/conda.sh https://repo.continuum.io/miniconda/Miniconda${ANACONDA_VERSION}-latest-MacOSX-x86_64.sh
|
||||
curl --retry 3 -o ${TMPDIR}/conda.sh https://repo.anaconda.com/miniconda/Miniconda${ANACONDA_VERSION}-latest-MacOSX-x86_64.sh
|
||||
chmod +x ${TMPDIR}/conda.sh
|
||||
/bin/bash ${TMPDIR}/conda.sh -b -p ${TMPDIR}/anaconda
|
||||
rm -f ${TMPDIR}/conda.sh
|
||||
|
@ -20,16 +20,16 @@ jobs:
|
||||
export id=$(docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
|
||||
|
||||
# TODO We may want to move the rebase logic to a separate step after checkout
|
||||
# Rebase to master only if in xenial_py3_6_gcc5_4 case
|
||||
if [[ "${CIRCLE_BRANCH}" != "master" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
|
||||
echo "Merge master branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
|
||||
# Rebase to release/1.5 only if in xenial_py3_6_gcc5_4 case
|
||||
if [[ "${CIRCLE_BRANCH}" != "release/1.5" && "${BUILD_ENVIRONMENT}" == *"gcc5"* ]]; then
|
||||
echo "Merge release/1.5 branch into $CIRCLE_BRANCH before build in environment $BUILD_ENVIRONMENT"
|
||||
set -x
|
||||
git config --global user.email "circleci.ossci@gmail.com"
|
||||
git config --global user.name "CircleCI"
|
||||
git config remote.origin.url https://github.com/pytorch/pytorch.git
|
||||
git config --add remote.origin.fetch +refs/heads/master:refs/remotes/origin/master
|
||||
git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/master:refs/remotes/origin/master --depth=100 --quiet
|
||||
export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/master`
|
||||
git config --add remote.origin.fetch +refs/heads/release/1.5:refs/remotes/origin/release/1.5
|
||||
git fetch --tags --progress https://github.com/pytorch/pytorch.git +refs/heads/release/1.5:refs/remotes/origin/release/1.5 --depth=100 --quiet
|
||||
export GIT_MERGE_TARGET=`git log -n 1 --pretty=format:"%H" origin/release/1.5`
|
||||
echo "GIT_MERGE_TARGET: " ${GIT_MERGE_TARGET}
|
||||
export GIT_COMMIT=${CIRCLE_SHA1}
|
||||
echo "GIT_COMMIT: " ${GIT_COMMIT}
|
||||
@ -38,7 +38,7 @@ jobs:
|
||||
git merge --allow-unrelated-histories --no-edit --no-ff ${GIT_MERGE_TARGET}
|
||||
set +x
|
||||
else
|
||||
echo "Do NOT merge master branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
|
||||
echo "Do NOT merge release/1.5 branch into $CIRCLE_BRANCH in environment $BUILD_ENVIRONMENT"
|
||||
fi
|
||||
|
||||
git submodule sync && git submodule update -q --init --recursive
|
||||
|
@ -15,6 +15,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- pytorch_windows_test:
|
||||
name: pytorch_windows_vs2017_14.11_py36_cuda10.1_test1
|
||||
test_name: pytorch-windows-test1
|
||||
@ -32,6 +33,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- pytorch_windows_test:
|
||||
name: pytorch_windows_vs2017_14.11_py36_cuda10.1_test2
|
||||
test_name: pytorch-windows-test2
|
||||
@ -49,6 +51,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- pytorch_windows_build:
|
||||
name: pytorch_windows_vs2017_14.16_py36_cuda10.1_build
|
||||
cuda_version: "10"
|
||||
@ -64,6 +67,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- pytorch_windows_test:
|
||||
name: pytorch_windows_vs2017_14.16_py36_cuda10.1_test1
|
||||
test_name: pytorch-windows-test1
|
||||
@ -81,6 +85,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- pytorch_windows_test:
|
||||
name: pytorch_windows_vs2017_14.16_py36_cuda10.1_test2
|
||||
test_name: pytorch-windows-test2
|
||||
@ -98,6 +103,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- pytorch_windows_build:
|
||||
name: pytorch_windows_vs2019_py36_cuda10.1_build
|
||||
cuda_version: "10"
|
||||
|
@ -7,12 +7,6 @@
|
||||
# pytorch-ci-hud to adjust the list of whitelisted builds
|
||||
# at https://github.com/ezyang/pytorch-ci-hud/blob/master/src/BuildHistoryDisplay.js
|
||||
|
||||
- binary_linux_build:
|
||||
name: binary_linux_manywheel_2_7mu_cpu_devtoolset7_build
|
||||
build_environment: "manywheel 2.7mu cpu devtoolset7"
|
||||
requires:
|
||||
- setup
|
||||
docker_image: "pytorch/manylinux-cuda102"
|
||||
- binary_linux_build:
|
||||
name: binary_linux_manywheel_3_7m_cu102_devtoolset7_build
|
||||
build_environment: "manywheel 3.7m cu102 devtoolset7"
|
||||
@ -23,24 +17,21 @@
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- binary_linux_build:
|
||||
name: binary_linux_conda_2_7_cpu_devtoolset7_build
|
||||
build_environment: "conda 2.7 cpu devtoolset7"
|
||||
requires:
|
||||
- setup
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
# This binary build is currently broken, see https://github_com/pytorch/pytorch/issues/16710
|
||||
# - binary_linux_conda_3_6_cu90_devtoolset7_build
|
||||
# TODO rename to remove python version for libtorch
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cpu_devtoolset7_shared-with-deps_build
|
||||
build_environment: "libtorch 2.7m cpu devtoolset7"
|
||||
name: binary_linux_libtorch_3_7m_cpu_devtoolset7_shared-with-deps_build
|
||||
build_environment: "libtorch 3.7m cpu devtoolset7"
|
||||
requires:
|
||||
- setup
|
||||
libtorch_variant: "shared-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda102"
|
||||
- binary_linux_build:
|
||||
name: binary_linux_libtorch_2_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_build
|
||||
build_environment: "libtorch 2.7m cpu gcc5.4_cxx11-abi"
|
||||
name: binary_linux_libtorch_3_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_build
|
||||
build_environment: "libtorch 3.7m cpu gcc5.4_cxx11-abi"
|
||||
requires:
|
||||
- setup
|
||||
libtorch_variant: "shared-with-deps"
|
||||
@ -48,45 +39,30 @@
|
||||
# TODO we should test a libtorch cuda build, but they take too long
|
||||
# - binary_linux_libtorch_2_7m_cu90_devtoolset7_static-without-deps_build
|
||||
- binary_mac_build:
|
||||
name: binary_macos_wheel_3_6_cpu_build
|
||||
build_environment: "wheel 3.6 cpu"
|
||||
requires:
|
||||
- setup
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- binary_mac_build:
|
||||
name: binary_macos_conda_2_7_cpu_build
|
||||
build_environment: "conda 2.7 cpu"
|
||||
name: binary_macos_wheel_3_7_cpu_build
|
||||
build_environment: "wheel 3.7 cpu"
|
||||
requires:
|
||||
- setup
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
# This job has an average run time of 3 hours o.O
|
||||
# Now only running this on master to reduce overhead
|
||||
# TODO rename to remove python version for libtorch
|
||||
- binary_mac_build:
|
||||
name: binary_macos_libtorch_2_7_cpu_build
|
||||
build_environment: "libtorch 2.7 cpu"
|
||||
name: binary_macos_libtorch_3_7_cpu_build
|
||||
build_environment: "libtorch 3.7 cpu"
|
||||
requires:
|
||||
- setup
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- binary_linux_test:
|
||||
name: binary_linux_manywheel_2_7mu_cpu_devtoolset7_test
|
||||
build_environment: "manywheel 2.7mu cpu devtoolset7"
|
||||
requires:
|
||||
- setup
|
||||
- binary_linux_manywheel_2_7mu_cpu_devtoolset7_build
|
||||
docker_image: "pytorch/manylinux-cuda102"
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
- binary_linux_test:
|
||||
name: binary_linux_manywheel_3_7m_cu102_devtoolset7_test
|
||||
build_environment: "manywheel 3.7m cu102 devtoolset7"
|
||||
@ -100,29 +76,25 @@
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- binary_linux_test:
|
||||
name: binary_linux_conda_2_7_cpu_devtoolset7_test
|
||||
build_environment: "conda 2.7 cpu devtoolset7"
|
||||
requires:
|
||||
- setup
|
||||
- binary_linux_conda_2_7_cpu_devtoolset7_build
|
||||
docker_image: "pytorch/conda-cuda"
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
# This binary build is currently broken, see https://github_com/pytorch/pytorch/issues/16710
|
||||
# - binary_linux_conda_3_6_cu90_devtoolset7_test:
|
||||
# TODO rename to remove python version for libtorch
|
||||
- binary_linux_test:
|
||||
name: binary_linux_libtorch_2_7m_cpu_devtoolset7_shared-with-deps_test
|
||||
build_environment: "libtorch 2.7m cpu devtoolset7"
|
||||
name: binary_linux_libtorch_3_7m_cpu_devtoolset7_shared-with-deps_test
|
||||
build_environment: "libtorch 3.7m cpu devtoolset7"
|
||||
requires:
|
||||
- setup
|
||||
- binary_linux_libtorch_2_7m_cpu_devtoolset7_shared-with-deps_build
|
||||
- binary_linux_libtorch_3_7m_cpu_devtoolset7_shared-with-deps_build
|
||||
libtorch_variant: "shared-with-deps"
|
||||
docker_image: "pytorch/manylinux-cuda102"
|
||||
- binary_linux_test:
|
||||
name: binary_linux_libtorch_2_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_test
|
||||
build_environment: "libtorch 2.7m cpu gcc5.4_cxx11-abi"
|
||||
name: binary_linux_libtorch_3_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_test
|
||||
build_environment: "libtorch 3.7m cpu gcc5.4_cxx11-abi"
|
||||
requires:
|
||||
- setup
|
||||
- binary_linux_libtorch_2_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_build
|
||||
- binary_linux_libtorch_3_7m_cpu_gcc5_4_cxx11-abi_shared-with-deps_build
|
||||
libtorch_variant: "shared-with-deps"
|
||||
docker_image: "pytorch/pytorch-binary-docker-image-ubuntu16.04:latest"
|
||||
|
||||
|
@ -20,21 +20,12 @@
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
|
||||
image_name: "pytorch-linux-xenial-cuda10.2-cudnn7-py3-gcc7"
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-cuda9-cudnn7-py2"
|
||||
image_name: "pytorch-linux-xenial-cuda9-cudnn7-py2"
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-cuda9-cudnn7-py3"
|
||||
image_name: "pytorch-linux-xenial-cuda9-cudnn7-py3"
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7"
|
||||
image_name: "pytorch-linux-xenial-cuda9.2-cudnn7-py3-gcc7"
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-py2.7.9"
|
||||
image_name: "pytorch-linux-xenial-py2.7.9"
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-py2.7"
|
||||
image_name: "pytorch-linux-xenial-py2.7"
|
||||
- docker_build_job:
|
||||
name: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
|
||||
image_name: "pytorch-linux-xenial-py3-clang5-android-ndk-r19c"
|
||||
|
@ -4,6 +4,8 @@
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
requires:
|
||||
- pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build
|
||||
|
||||
@ -13,6 +15,8 @@
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
requires:
|
||||
- pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_32_build
|
||||
- pytorch_linux_xenial_py3_clang5_android_ndk_r19c_x86_64_build
|
||||
|
@ -31,6 +31,7 @@
|
||||
only:
|
||||
- master
|
||||
- /ci-all\/.*/
|
||||
- /release\/.*/
|
||||
build_environment: "pytorch-linux-xenial-py3-clang5-mobile-code-analysis"
|
||||
build_only: "1"
|
||||
# Use LLVM-DEV toolchain in android-ndk-r19c docker image
|
||||
|
40
.github/workflows/lint.yml
vendored
40
.github/workflows/lint.yml
vendored
@ -81,44 +81,6 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
flake8-py2:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 2.x
|
||||
architecture: x64
|
||||
- name: Fetch PyTorch
|
||||
uses: actions/checkout@v1
|
||||
- name: Checkout PR tip
|
||||
run: |
|
||||
set -eux
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
# We are on a PR, so actions/checkout leaves us on a merge commit.
|
||||
# Check out the actual tip of the branch.
|
||||
git checkout ${{ github.event.pull_request.head.sha }}
|
||||
fi
|
||||
echo ::set-output name=commit_sha::$(git rev-parse HEAD)
|
||||
id: get_pr_tip
|
||||
- name: Run flake8
|
||||
run: |
|
||||
set -eux
|
||||
pip install flake8
|
||||
rm -rf .circleci tools/clang_format_new.py
|
||||
flake8 --exit-zero > ${GITHUB_WORKSPACE}/flake8-output.txt
|
||||
cat ${GITHUB_WORKSPACE}/flake8-output.txt
|
||||
- name: Add annotations
|
||||
uses: pytorch/add-annotations-github-action@master
|
||||
with:
|
||||
check_name: 'flake8-py2'
|
||||
linter_output_path: 'flake8-output.txt'
|
||||
commit_sha: ${{ steps.get_pr_tip.outputs.commit_sha }}
|
||||
regex: '^(?<filename>.*?):(?<lineNumber>\d+):(?<columnNumber>\d+): (?<errorCode>\w\d+) (?<errorDesc>.*)'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
||||
clang-tidy:
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
@ -198,6 +160,8 @@ jobs:
|
||||
-g"-torch/csrc/jit/export.cpp" \
|
||||
-g"-torch/csrc/jit/import.cpp" \
|
||||
-g"-torch/csrc/jit/netdef_converter.cpp" \
|
||||
-g"-torch/csrc/cuda/nccl.*" \
|
||||
-g"-torch/csrc/cuda/python_nccl.cpp" \
|
||||
"$@" > ${GITHUB_WORKSPACE}/clang-tidy-output.txt
|
||||
|
||||
cat ${GITHUB_WORKSPACE}/clang-tidy-output.txt
|
||||
|
@ -167,7 +167,7 @@ fi
|
||||
|
||||
# Patch required to build xla
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
|
||||
git clone --recursive https://github.com/pytorch/xla.git
|
||||
git clone --recursive -b r1.5 https://github.com/pytorch/xla.git
|
||||
./xla/scripts/apply_patches.sh
|
||||
fi
|
||||
|
||||
|
@ -13,12 +13,12 @@ mkdir -p ${WORKSPACE_DIR}
|
||||
# If a local installation of conda doesn't exist, we download and install conda
|
||||
if [ ! -d "${WORKSPACE_DIR}/miniconda3" ]; then
|
||||
mkdir -p ${WORKSPACE_DIR}
|
||||
curl --retry 3 https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ${WORKSPACE_DIR}/miniconda3.sh
|
||||
curl --retry 3 https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o ${WORKSPACE_DIR}/miniconda3.sh
|
||||
retry bash ${WORKSPACE_DIR}/miniconda3.sh -b -p ${WORKSPACE_DIR}/miniconda3
|
||||
fi
|
||||
export PATH="${WORKSPACE_DIR}/miniconda3/bin:$PATH"
|
||||
source ${WORKSPACE_DIR}/miniconda3/bin/activate
|
||||
retry conda install -y mkl mkl-include numpy pyyaml setuptools cmake cffi ninja
|
||||
retry conda install -y mkl mkl-include numpy pyyaml=5.3 setuptools=46.0.0 cmake cffi ninja
|
||||
|
||||
# The torch.hub tests make requests to GitHub.
|
||||
#
|
||||
|
@ -20,7 +20,7 @@ if [ -n "${IN_CIRCLECI}" ]; then
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages libnccl-dev=2.5.6-1+cuda10.1 libnccl2=2.5.6-1+cuda10.1
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda10.1-cudnn7-py3* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
||||
|
@ -21,7 +21,7 @@ if [ -n "${IN_CIRCLECI}" ]; then
|
||||
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages libnccl-dev=2.5.6-1+cuda10.1 libnccl2=2.5.6-1+cuda10.1
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda9-cudnn7-py2* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-xenial-cuda10.1-cudnn7-py3* ]]; then
|
||||
# TODO: move this to Docker
|
||||
sudo apt-get -qq update
|
||||
sudo apt-get -qq install --allow-downgrades --allow-change-held-packages openmpi-bin libopenmpi-dev
|
||||
@ -244,7 +244,7 @@ test_backward_compatibility() {
|
||||
pushd test/backward_compatibility
|
||||
python dump_all_function_schemas.py --filename new_schemas.txt
|
||||
pip_uninstall torch
|
||||
pip_install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
|
||||
pip_install torch==1.4.0+cpu torchvision==0.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
python check_backward_compatibility.py --new-schemas new_schemas.txt
|
||||
popd
|
||||
set +x
|
||||
|
@ -5,7 +5,7 @@ if "%BUILD_ENVIRONMENT%"=="" (
|
||||
)
|
||||
if "%REBUILD%"=="" (
|
||||
IF EXIST %CONDA_PARENT_DIR%\Miniconda3 ( rd /s /q %CONDA_PARENT_DIR%\Miniconda3 )
|
||||
curl --retry 3 -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
|
||||
curl --retry 3 -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
|
||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
|
||||
)
|
||||
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
|
||||
|
@ -13,7 +13,7 @@ if "%BUILD_ENVIRONMENT%"=="" (
|
||||
)
|
||||
if NOT "%BUILD_ENVIRONMENT%"=="" (
|
||||
IF EXIST %CONDA_PARENT_DIR%\Miniconda3 ( rd /s /q %CONDA_PARENT_DIR%\Miniconda3 )
|
||||
curl --retry 3 https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
|
||||
curl --retry 3 https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe --output %TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe
|
||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
|
||||
)
|
||||
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
|
||||
|
@ -160,20 +160,18 @@ ENDIF(BLAS_FOUND)
|
||||
|
||||
IF(LAPACK_FOUND)
|
||||
list(APPEND ATen_CPU_DEPENDENCY_LIBS ${LAPACK_LIBRARIES})
|
||||
if(USE_CUDA)
|
||||
if(USE_CUDA AND MSVC)
|
||||
# Although Lapack provides CPU (and thus, one might expect that ATen_cuda
|
||||
# would not need this at all), some of our libraries (magma in particular)
|
||||
# backend to CPU BLAS/LAPACK implementations, and so it is very important
|
||||
# we get the *right* implementation, because even if the symbols are the
|
||||
# same, LAPACK implementions may have different calling conventions.
|
||||
# This caused https://github.com/pytorch/pytorch/issues/7353
|
||||
#
|
||||
# We do NOT do this on Linux, since we just rely on torch_cpu to
|
||||
# provide all of the symbols we need
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
if(USE_ROCM)
|
||||
# It's not altogether clear that HIP behaves the same way, but it
|
||||
# seems safer to assume that it needs it too
|
||||
list(APPEND ATen_HIP_DEPENDENCY_LIBS ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
ENDIF(LAPACK_FOUND)
|
||||
|
||||
IF (UNIX AND NOT APPLE)
|
||||
@ -331,8 +329,12 @@ IF(USE_CUDA AND NOT USE_ROCM)
|
||||
IF(USE_MAGMA)
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS ${MAGMA_LIBRARIES})
|
||||
IF ($ENV{TH_BINARY_BUILD})
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
|
||||
"${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}")
|
||||
IF (MSVC)
|
||||
# Do not do this on Linux: see Note [Extra MKL symbols for MAGMA in torch_cpu]
|
||||
# in caffe2/CMakeLists.txt
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS
|
||||
"${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}")
|
||||
ENDIF(MSVC)
|
||||
ENDIF($ENV{TH_BINARY_BUILD})
|
||||
ENDIF(USE_MAGMA)
|
||||
IF ($ENV{ATEN_STATIC_CUDA})
|
||||
|
@ -125,13 +125,15 @@ void _parallel_run(
|
||||
std::tie(num_tasks, chunk_size) =
|
||||
internal::calc_num_tasks_and_chunk_size(begin, end, grain_size);
|
||||
|
||||
std::atomic_flag err_flag = ATOMIC_FLAG_INIT;
|
||||
std::exception_ptr eptr;
|
||||
std::vector<std::shared_ptr<c10::ivalue::Future>> futures(num_tasks);
|
||||
for (size_t task_id = 0; task_id < num_tasks; ++task_id) {
|
||||
futures[task_id] = std::make_shared<c10::ivalue::Future>(c10::NoneType::get());
|
||||
}
|
||||
auto task = [f, &eptr, &err_flag, &futures, begin, end, chunk_size]
|
||||
struct {
|
||||
std::atomic_flag err_flag = ATOMIC_FLAG_INIT;
|
||||
std::exception_ptr eptr;
|
||||
std::mutex mutex;
|
||||
volatile size_t remaining;
|
||||
std::condition_variable cv;
|
||||
} state;
|
||||
|
||||
auto task = [f, &state, begin, end, chunk_size]
|
||||
(int /* unused */, size_t task_id) {
|
||||
int64_t local_start = begin + task_id * chunk_size;
|
||||
if (local_start < end) {
|
||||
@ -140,21 +142,30 @@ void _parallel_run(
|
||||
ParallelRegionGuard guard(task_id);
|
||||
f(local_start, local_end, task_id);
|
||||
} catch (...) {
|
||||
if (!err_flag.test_and_set()) {
|
||||
eptr = std::current_exception();
|
||||
if (!state.err_flag.test_and_set()) {
|
||||
state.eptr = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
futures[task_id]->markCompleted();
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(state.mutex);
|
||||
if (--state.remaining == 0) {
|
||||
state.cv.notify_one();
|
||||
}
|
||||
}
|
||||
};
|
||||
state.remaining = num_tasks;
|
||||
_run_with_pool(task, num_tasks);
|
||||
|
||||
// Wait for all tasks to finish.
|
||||
for (size_t task_id = 0; task_id < num_tasks; ++task_id) {
|
||||
futures[task_id]->wait();
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(state.mutex);
|
||||
if (state.remaining != 0) {
|
||||
state.cv.wait(lk);
|
||||
}
|
||||
}
|
||||
if (eptr) {
|
||||
std::rethrow_exception(eptr);
|
||||
if (state.eptr) {
|
||||
std::rethrow_exception(state.eptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,14 +16,6 @@
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
|
||||
#if defined(__clang__)
|
||||
#define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero")))
|
||||
#define __ubsan_ignore_vptr__ __attribute__((no_sanitize("vptr")))
|
||||
#else
|
||||
#define __ubsan_ignore_float_divide_by_zero__
|
||||
#define __ubsan_ignore_vptr__
|
||||
#endif
|
||||
|
||||
#define AT_DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
||||
TypeName(const TypeName&) = delete; \
|
||||
void operator=(const TypeName&) = delete
|
||||
|
@ -20,6 +20,10 @@ void registerCustomClass(at::ClassTypePtr class_type) {
|
||||
}
|
||||
|
||||
at::ClassTypePtr getCustomClass(const std::string& name) {
|
||||
// BC hack so we can upgrade a binary internally
|
||||
if (name == "__torch__.torch.classes.SentencePiece") {
|
||||
return getCustomClass("__torch__.torch.classes.fb.SentencePiece");
|
||||
}
|
||||
return customClasses().count(name) ? customClasses()[name] : nullptr;
|
||||
}
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <c10/util/math_compat.h>
|
||||
#include <ATen/native/cpu/zmath.h>
|
||||
#include <c10/util/TypeCast.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define __at_align32__ __attribute__((aligned(32)))
|
||||
|
@ -145,7 +145,7 @@ private:
|
||||
|
||||
std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d);
|
||||
|
||||
class FilterDescriptor
|
||||
class TORCH_CUDA_API FilterDescriptor
|
||||
: public Descriptor<cudnnFilterStruct,
|
||||
&cudnnCreateFilterDescriptor,
|
||||
&cudnnDestroyFilterDescriptor>
|
||||
|
@ -138,6 +138,10 @@ Tensor true_divide(const Tensor& self, const Tensor& divisor) {
|
||||
return iter.output();
|
||||
}
|
||||
|
||||
Tensor& true_divide_(Tensor& self, const Tensor& divisor) {
|
||||
return native::true_divide_out(self, self, divisor);
|
||||
}
|
||||
|
||||
Tensor& floor_divide_out(Tensor& result, const Tensor& self, const Tensor& other) {
|
||||
auto iter = TensorIterator::binary_op(result, self, other,
|
||||
/*check_mem_overlap=*/true);
|
||||
@ -731,7 +735,11 @@ Tensor& fmod_(Tensor& self, Scalar other) {
|
||||
}
|
||||
|
||||
Tensor true_divide(const Tensor& self, Scalar divisor) {
|
||||
return at::true_divide(self, wrapped_scalar_tensor(divisor)); // redispatch!
|
||||
return self.true_divide(wrapped_scalar_tensor(divisor)); // redispatch!
|
||||
}
|
||||
|
||||
Tensor& true_divide_(Tensor& self, Scalar divisor) {
|
||||
return self.true_divide_(wrapped_scalar_tensor(divisor)); // redispatch!
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -70,8 +70,8 @@ struct CAFFE2_API DispatchStub<rT (*)(Args...), T> {
|
||||
// they will still compute the same value for cpu_dispatch_ptr.
|
||||
if (!cpu_dispatch_ptr.load(std::memory_order_relaxed)) {
|
||||
FnPtr tmp_cpu_dispatch_ptr = nullptr;
|
||||
cpu_dispatch_ptr.compare_exchange_weak(
|
||||
tmp_cpu_dispatch_ptr, choose_cpu_impl(), std::memory_order_relaxed);
|
||||
while(!cpu_dispatch_ptr.compare_exchange_weak(
|
||||
tmp_cpu_dispatch_ptr, choose_cpu_impl(), std::memory_order_relaxed));
|
||||
}
|
||||
return (*cpu_dispatch_ptr)(std::forward<ArgTypes>(args)...);
|
||||
} else if (device_type == DeviceType::CUDA) {
|
||||
|
@ -533,7 +533,7 @@ Tensor frobenius_norm(const Tensor& self, IntArrayRef dim, bool keepdim) {
|
||||
return at::norm(self, 2, dim, keepdim, self.scalar_type());
|
||||
}
|
||||
if (self.is_complex()){
|
||||
return at::sqrt(at::sum((self.conj() * self).real(), dim, keepdim));
|
||||
return at::sqrt(at::sum(at::real(self.conj() * self), dim, keepdim));
|
||||
} else {
|
||||
return at::sqrt(at::sum((self * self), dim, keepdim));
|
||||
}
|
||||
@ -553,7 +553,7 @@ Tensor &frobenius_norm_out(
|
||||
return at::norm_out(result, self, 2, dim, keepdim, self.scalar_type());
|
||||
}
|
||||
if (self.is_complex()){
|
||||
return at::sqrt_out(result, at::sum((self.conj() * self).real(), dim, keepdim));
|
||||
return at::sqrt_out(result, at::sum(at::real(self.conj() * self), dim, keepdim));
|
||||
} else {
|
||||
return at::sqrt_out(result, at::sum((self * self), dim, keepdim));
|
||||
}
|
||||
|
@ -799,7 +799,7 @@ static Tensor &std_var_out(Tensor &result, const Tensor &self, IntArrayRef dim,
|
||||
|
||||
if (at::isComplexType(self.scalar_type())){
|
||||
ScalarType dtype = c10::toValueType(get_dtype(result, self, {}, true));
|
||||
Tensor real_in = self.real().to(dtype);
|
||||
Tensor real_in = at::real(self).to(dtype);
|
||||
Tensor real_out = at::empty({0}, self.options().dtype(dtype));
|
||||
auto iter = make_reduction("std or var", real_out, real_in, dim, keepdim, dtype);
|
||||
if (iter.numel() == 0) {
|
||||
@ -807,7 +807,7 @@ static Tensor &std_var_out(Tensor &result, const Tensor &self, IntArrayRef dim,
|
||||
} else {
|
||||
std_var_stub(iter.device_type(), iter, unbiased, false);
|
||||
}
|
||||
Tensor imag_in = self.imag().to(dtype);
|
||||
Tensor imag_in = at::imag(self).to(dtype);
|
||||
Tensor imag_out = at::empty({0}, self.options().dtype(dtype));
|
||||
iter = make_reduction("std or var", imag_out, imag_in, dim, keepdim, dtype);
|
||||
if (iter.numel() == 0) {
|
||||
@ -845,7 +845,7 @@ static std::tuple<Tensor&,Tensor&> std_var_mean_out(const char* fname, Tensor &r
|
||||
".");
|
||||
if (at::isComplexType(self.scalar_type())){
|
||||
ScalarType dtype = c10::toValueType(get_dtype(result1, self, {}, true));
|
||||
Tensor real_in = self.real().to(dtype);
|
||||
Tensor real_in = at::real(self).to(dtype);
|
||||
Tensor real_out_var = at::empty({0}, self.options().dtype(dtype));
|
||||
Tensor real_out_mean = at::empty({0}, self.options().dtype(dtype));
|
||||
auto iter = make_reduction(fname, real_out_var, real_out_mean, real_in, dim, keepdim, dtype);
|
||||
@ -855,7 +855,7 @@ static std::tuple<Tensor&,Tensor&> std_var_mean_out(const char* fname, Tensor &r
|
||||
} else {
|
||||
std_var_stub(iter.device_type(), iter, unbiased, false);
|
||||
}
|
||||
Tensor imag_in = self.imag().to(dtype);
|
||||
Tensor imag_in = at::imag(self).to(dtype);
|
||||
Tensor imag_out_var = at::empty({0}, self.options().dtype(dtype));
|
||||
Tensor imag_out_mean = at::empty({0}, self.options().dtype(dtype));
|
||||
iter = make_reduction(fname, imag_out_var, imag_out_mean, imag_in, dim, keepdim, dtype);
|
||||
|
@ -33,7 +33,7 @@ static inline Tensor to_impl(const Tensor& self, const TensorOptions& options, b
|
||||
if (self.is_non_overlapping_and_dense()) {
|
||||
// Copy all strides
|
||||
auto r = at::empty_strided(self.sizes(), self.strides(), options.memory_format(c10::nullopt));
|
||||
r.copy_(self);
|
||||
r.copy_(self, non_blocking);
|
||||
return r;
|
||||
} else {
|
||||
memory_format = self.suggest_memory_format();
|
||||
|
@ -99,7 +99,7 @@ Tensor _dim_arange(const Tensor& like, int64_t dim) {
|
||||
|
||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
Tensor empty_cpu(IntArrayRef size, const TensorOptions& options_, c10::optional<c10::MemoryFormat> optional_memory_format) {
|
||||
|
||||
TORCH_CHECK(!isComplexType(at::typeMetaToScalarType(options_.dtype())), "Complex dtype not supported.");
|
||||
TORCH_CHECK(
|
||||
!(options_.has_memory_format() && optional_memory_format.has_value()),
|
||||
"Cannot set memory_format both in TensorOptions and explicit argument; please delete "
|
||||
|
@ -98,6 +98,15 @@ Tensor & _cat_out_cpu(Tensor& result, TensorList tensors, int64_t dim) {
|
||||
"output memory locations. Found overlap in input tensor ", i);
|
||||
}
|
||||
|
||||
// Dtypes should be the same
|
||||
const auto first_in_cat = tensors[0];
|
||||
for (int64_t i = 1; i < tensors.size(); i++) {
|
||||
TORCH_CHECK(first_in_cat.dtype() == tensors[i].dtype(),
|
||||
"Expected object of scalar type ", first_in_cat.dtype(),
|
||||
" but got scalar type ", tensors[i].dtype(),
|
||||
" for sequence element ", i, ".");
|
||||
}
|
||||
|
||||
auto should_skip = [](const Tensor& t) { return t.numel() == 0 && t.dim() == 1; };
|
||||
for (auto const &tensor : tensors) {
|
||||
if (should_skip(tensor)) {
|
||||
|
@ -73,11 +73,17 @@ Tensor& abs_(Tensor& self) { return unary_op_impl_(self, at::abs_out); }
|
||||
Tensor& angle_out(Tensor& result, const Tensor& self) { return unary_op_impl_out(result, self, angle_stub); }
|
||||
Tensor angle(const Tensor& self) { return unary_op_impl(self, at::angle_out); }
|
||||
|
||||
Tensor& real_out(Tensor& result, const Tensor& self) { return unary_op_impl_out(result, self, real_stub); }
|
||||
Tensor real(const Tensor& self) { return unary_op_impl(self, at::real_out); }
|
||||
Tensor real(const Tensor& self) {
|
||||
TORCH_CHECK(!self.is_complex(), "real is not yet implemented for complex tensors.");
|
||||
return self;
|
||||
}
|
||||
|
||||
Tensor& imag_out(Tensor& result, const Tensor& self) { return unary_op_impl_out(result, self, imag_stub); }
|
||||
Tensor imag(const Tensor& self) { return unary_op_impl(self, at::imag_out); }
|
||||
Tensor imag(const Tensor& self) {
|
||||
TORCH_CHECK(false, "imag is not yet implemented.");
|
||||
|
||||
// Note: unreachable
|
||||
return at::zeros_like(self);
|
||||
}
|
||||
|
||||
Tensor& conj_out(Tensor& result, const Tensor& self) { return unary_op_impl_out(result, self, conj_stub); }
|
||||
Tensor conj(const Tensor& self) { return unary_op_impl(self, at::conj_out); }
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <ATen/native/TensorIterator.h>
|
||||
#include <ATen/native/BinaryOps.h>
|
||||
#include <ATen/native/cpu/Loops.h>
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
namespace at { namespace native {
|
||||
namespace {
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <ATen/native/cuda/zmath.cuh>
|
||||
#include <ATen/native/TensorIterator.h>
|
||||
#include <ATen/native/BinaryOps.h>
|
||||
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
// NOTE: CUDA on Windows requires that the enclosing function
|
||||
// of a __device__ lambda not have internal linkage.
|
||||
|
@ -358,7 +358,7 @@ void max_pool2d_with_indices_out_cuda_template(
|
||||
|
||||
Tensor input = input_.contiguous(memory_format);
|
||||
|
||||
const int64_t in_stride_n = input.stride(-4);
|
||||
const int64_t in_stride_n = input_.ndimension() == 4 ? input.stride(-4) : 0;
|
||||
const int64_t in_stride_c = input.stride(-3);
|
||||
const int64_t in_stride_h = input.stride(-2);
|
||||
const int64_t in_stride_w = input.stride(-1);
|
||||
@ -506,7 +506,7 @@ void max_pool2d_with_indices_backward_out_cuda_template(
|
||||
const int64_t inputHeight = input.size(-2);
|
||||
const int64_t inputWidth = input.size(-1);
|
||||
|
||||
const int64_t in_stride_n = input.stride(-4);
|
||||
const int64_t in_stride_n = input.ndimension() == 4 ? input.stride(-4) : 0;
|
||||
const int64_t in_stride_c = input.stride(-3);
|
||||
const int64_t in_stride_h = input.stride(-2);
|
||||
const int64_t in_stride_w = input.stride(-1);
|
||||
|
@ -198,7 +198,7 @@ void index_put_accum_kernel(Tensor & self, TensorList indices, const Tensor & va
|
||||
using device_ptr = thrust::device_ptr<int64_t>;
|
||||
const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
|
||||
|
||||
linearIndex.div_(sliceSize);
|
||||
linearIndex.floor_divide_(sliceSize);
|
||||
{
|
||||
sorted_indices.copy_(linearIndex);
|
||||
auto allocator = THCThrustAllocator(globalContext().lazyInitCUDA());
|
||||
|
@ -307,6 +307,15 @@ Tensor& cat_out_cuda(Tensor& out, TensorList inputs, int64_t dimension) {
|
||||
"tensor ", i);
|
||||
}
|
||||
|
||||
// Dtypes should be the same
|
||||
const auto first_in_cat = inputs[0];
|
||||
for (int64_t i = 1; i < inputs.size(); i++) {
|
||||
TORCH_CHECK(first_in_cat.dtype() == inputs[i].dtype(),
|
||||
"Expected object of scalar type ", first_in_cat.dtype(),
|
||||
" but got scalar type ", inputs[i].dtype(),
|
||||
" for sequence element ", i, ".");
|
||||
}
|
||||
|
||||
for (int i = 0; i < inputs.size(); i++)
|
||||
{
|
||||
if (should_skip(inputs[i])) {
|
||||
@ -325,6 +334,12 @@ Tensor& cat_out_cuda(Tensor& out, TensorList inputs, int64_t dimension) {
|
||||
TORCH_CHECK(inputs.size() > 0, "invalid number of inputs ", inputs.size());
|
||||
TORCH_CHECK(dimension >= 0, "invalid dimension ", dimension);
|
||||
|
||||
for (const Tensor& t: inputs) {
|
||||
TORCH_CHECK(t.device() == notSkippedTensor->device(),
|
||||
"All input tensors must be on the same device. Received ",
|
||||
t.device(), " and ", notSkippedTensor->device());
|
||||
}
|
||||
|
||||
c10::MemoryFormat memory_format = compute_output_memory_format(inputs);
|
||||
|
||||
std::vector<int64_t> size(notSkippedTensor->sizes().vec());
|
||||
@ -355,17 +370,11 @@ Tensor& cat_out_cuda(Tensor& out, TensorList inputs, int64_t dimension) {
|
||||
// 4. The number of dimensions is <= 4
|
||||
// 5. All input tensors are contiguous (output tensor may be non-contig)
|
||||
// 6. All input tensors can use 32-bit indexing
|
||||
// 7. All input tensors are on the same device
|
||||
|
||||
const bool all32BitIndexable = std::all_of(inputs.begin(), inputs.end(),
|
||||
[] (const Tensor& t) {
|
||||
return at::cuda::detail::canUse32BitIndexMath(t);
|
||||
});
|
||||
Device firstDevice = notSkippedTensor->device();
|
||||
const bool allSameDevice = std::all_of(inputs.begin(), inputs.end(),
|
||||
[firstDevice](const Tensor& t) {
|
||||
return t.device() == firstDevice;
|
||||
});
|
||||
const bool allContiguous = std::all_of(inputs.begin(), inputs.end(),
|
||||
[=](const Tensor& t) {
|
||||
return !t.defined() || t.is_contiguous(memory_format);
|
||||
@ -375,8 +384,7 @@ Tensor& cat_out_cuda(Tensor& out, TensorList inputs, int64_t dimension) {
|
||||
out.dim() <= CAT_ARRAY_MAX_INPUT_DIMS &&
|
||||
at::cuda::detail::canUse32BitIndexMath(out) &&
|
||||
allContiguous &&
|
||||
all32BitIndexable &&
|
||||
allSameDevice) {
|
||||
all32BitIndexable) {
|
||||
|
||||
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(
|
||||
at::ScalarType::Half, at::ScalarType::Bool, at::ScalarType::BFloat16,
|
||||
|
@ -125,7 +125,7 @@ struct TopKTypeConfig<at::Half> {
|
||||
static inline __device__ RadixType convert(at::Half v) {
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_HCC__)
|
||||
RadixType x = __half_as_ushort(v);
|
||||
RadixType mask = -((x >> 15)) | 0x8000;
|
||||
RadixType mask = (x & 0x00008000) ? 0x0000ffff : 0x00008000;
|
||||
return (v == v) ? (x ^ mask) : 0xffff;
|
||||
#else
|
||||
assert(false);
|
||||
@ -135,7 +135,7 @@ struct TopKTypeConfig<at::Half> {
|
||||
|
||||
static inline __device__ at::Half deconvert(RadixType v) {
|
||||
#if defined(__CUDA_ARCH__) || defined(__HIP_PLATFORM_HCC__)
|
||||
RadixType mask = ((v >> 15) - 1) | 0x8000;
|
||||
RadixType mask = (v & 0x00008000) ? 0x00008000 : 0x0000ffff;
|
||||
return __ushort_as_half(v ^ mask);
|
||||
#else
|
||||
assert(false);
|
||||
|
@ -44,6 +44,7 @@ Tensor& eye_out_cuda(Tensor& result, int64_t n, int64_t m) {
|
||||
}
|
||||
|
||||
Tensor empty_cuda(IntArrayRef size, const TensorOptions& options, c10::optional<MemoryFormat> optional_memory_format) {
|
||||
TORCH_CHECK(!isComplexType(at::typeMetaToScalarType(options.dtype())), "Complex dtype not supported.");
|
||||
AT_ASSERT(options.device().type() == at::DeviceType::CUDA);
|
||||
TORCH_INTERNAL_ASSERT(impl::variable_excluded_from_dispatch());
|
||||
TORCH_CHECK(!options.pinned_memory(), "Only dense CPU tensors can be pinned");
|
||||
|
@ -238,18 +238,12 @@
|
||||
|
||||
- func: real(Tensor self) -> Tensor
|
||||
use_c10_dispatcher: full
|
||||
variants: function, method
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: real.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
||||
variants: function
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: imag(Tensor self) -> Tensor
|
||||
use_c10_dispatcher: full
|
||||
variants: function, method
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: imag.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
|
||||
variants: function
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: conj(Tensor self) -> Tensor
|
||||
@ -2872,7 +2866,7 @@
|
||||
|
||||
- func: true_divide.Tensor(Tensor self, Tensor other) -> Tensor
|
||||
use_c10_dispatcher: full
|
||||
variants: function
|
||||
variants: function, method
|
||||
dispatch:
|
||||
CPU: true_divide
|
||||
CUDA: true_divide
|
||||
@ -2880,6 +2874,15 @@
|
||||
SparseCUDA: true_divide_sparse
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
|
||||
variants: method
|
||||
dispatch:
|
||||
CPU: true_divide_
|
||||
CUDA: true_divide_
|
||||
SparseCPU: true_divide_sparse_
|
||||
SparseCUDA: true_divide_sparse_
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: true_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
|
||||
dispatch:
|
||||
CPU: true_divide_out
|
||||
@ -2890,7 +2893,11 @@
|
||||
|
||||
- func: true_divide.Scalar(Tensor self, Scalar other) -> Tensor
|
||||
use_c10_dispatcher: full
|
||||
variants: function
|
||||
variants: function, method
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
|
||||
variants: method
|
||||
supports_named_tensor: True
|
||||
|
||||
- func: trunc(Tensor self) -> Tensor
|
||||
|
@ -272,6 +272,10 @@ SparseTensor& true_divide_out_sparse_scalar(
|
||||
return true_divide_out_sparse_zerodim(result, dividend, wrapped_scalar_tensor(divisor));
|
||||
}
|
||||
|
||||
Tensor& true_divide_sparse_(Tensor& self, const Tensor& divisor) {
|
||||
return true_divide_out_sparse_zerodim(self, self, divisor);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// floor_divide(SparseTensor, Scalar)
|
||||
// --------------------------------------------------------------------
|
||||
|
@ -138,7 +138,7 @@ SparseTensor coalesce_sparse_cuda(const SparseTensor& self) {
|
||||
// broadcasting logic; instead, it will blast the elements from one
|
||||
// to the other so long as the numel is the same
|
||||
indicesSlice.copy_(indices1D);
|
||||
indices1D.div_(self.size(d));
|
||||
indices1D.floor_divide_(self.size(d));
|
||||
indicesSlice.add_(indices1D, -self.size(d));
|
||||
}
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ namespace xnnpack {
|
||||
namespace {
|
||||
torch::jit::class_<XNNPackLinearOpContext> register_xnnpack_linear_op_context_class() {
|
||||
static auto register_linear_op_context_class =
|
||||
torch::jit::class_<XNNPackLinearOpContext>("XNNPackLinearOpContext")
|
||||
torch::jit::class_<XNNPackLinearOpContext>("xnnpack", "XNNPackLinearOpContext")
|
||||
.def_pickle(
|
||||
[](const c10::intrusive_ptr<XNNPackLinearOpContext>& op_context)
|
||||
-> SerializationTypeLinearPrePack { // __getstate__
|
||||
@ -38,7 +38,7 @@ torch::jit::class_<XNNPackLinearOpContext> register_xnnpack_linear_op_context_cl
|
||||
|
||||
torch::jit::class_<XNNPackConv2dOpContext> register_xnnpack_conv2d_op_context_class() {
|
||||
static auto register_conv2d_op_context_class =
|
||||
torch::jit::class_<XNNPackConv2dOpContext>("XNNPackConv2dOpContext")
|
||||
torch::jit::class_<XNNPackConv2dOpContext>("xnnpack", "XNNPackConv2dOpContext")
|
||||
.def_pickle(
|
||||
[](const c10::intrusive_ptr<XNNPackConv2dOpContext>& op_context)
|
||||
-> SerializationTypeConv2dPrePack { // __getstate__
|
||||
@ -74,25 +74,25 @@ static auto registry =
|
||||
// Registering under _xnnpack namespace for now. As we add more backend requiring similar functionality
|
||||
// We can refactor the code and use a better namespace.
|
||||
torch::RegisterOperators()
|
||||
.op("_xnnpack::linear_prepack(Tensor W, Tensor? B=None) -> __torch__.torch.classes.XNNPackLinearOpContext",
|
||||
.op("_xnnpack::linear_prepack(Tensor W, Tensor? B=None) -> __torch__.torch.classes.xnnpack.XNNPackLinearOpContext",
|
||||
torch::RegisterOperators::options()
|
||||
.aliasAnalysis(at::AliasAnalysisKind::PURE_FUNCTION)
|
||||
.kernel<internal::linear::LinearPrePack>(
|
||||
DispatchKey::CPUTensorId))
|
||||
.op("_xnnpack::linear_packed(Tensor X, __torch__.torch.classes.XNNPackLinearOpContext W_prepack) -> Tensor Y",
|
||||
.op("_xnnpack::linear_packed(Tensor X, __torch__.torch.classes.xnnpack.XNNPackLinearOpContext W_prepack) -> Tensor Y",
|
||||
torch::RegisterOperators::options()
|
||||
.aliasAnalysis(at::AliasAnalysisKind::PURE_FUNCTION)
|
||||
.kernel<internal::linear::LinearPacked>(
|
||||
DispatchKey::CPUTensorId))
|
||||
.op("_xnnpack::conv2d_prepack(Tensor W, Tensor? B, int[2] stride, "
|
||||
"int[2] padding, int[2] dilation, int groups) "
|
||||
"-> __torch__.torch.classes.XNNPackConv2dOpContext",
|
||||
"-> __torch__.torch.classes.xnnpack.XNNPackConv2dOpContext",
|
||||
torch::RegisterOperators::options()
|
||||
.aliasAnalysis(at::AliasAnalysisKind::PURE_FUNCTION)
|
||||
.kernel<internal::convolution2d::Conv2dPrePack>(
|
||||
DispatchKey::CPUTensorId))
|
||||
.op("_xnnpack::conv2d_packed(Tensor X, "
|
||||
"__torch__.torch.classes.XNNPackConv2dOpContext W_prepack) -> Tensor Y",
|
||||
"__torch__.torch.classes.xnnpack.XNNPackConv2dOpContext W_prepack) -> Tensor Y",
|
||||
torch::RegisterOperators::options()
|
||||
.aliasAnalysis(at::AliasAnalysisKind::PURE_FUNCTION)
|
||||
.kernel<internal::convolution2d::Conv2dPacked>(
|
||||
|
@ -423,6 +423,85 @@ class CAFFE2_API Tensor {
|
||||
|
||||
// ~~~~~ Autograd API ~~~~~
|
||||
|
||||
/// \fn bool is_leaf() const;
|
||||
///
|
||||
/// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention.
|
||||
///
|
||||
/// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were
|
||||
/// created by the user. This means that they are not the result of an operation and so
|
||||
/// `grad_fn()` is `nullptr`.
|
||||
///
|
||||
/// Only leaf Tensors will have their `grad()` populated during a call to `backward()`.
|
||||
/// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`.
|
||||
///
|
||||
/// Example:
|
||||
/// @code
|
||||
/// auto a = torch::rand(10, torch::requires_grad());
|
||||
/// std::cout << a.is_leaf() << std::endl; // prints `true`
|
||||
///
|
||||
/// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA);
|
||||
/// std::cout << b.is_leaf() << std::endl; // prints `false`
|
||||
/// // b was created by the operation that cast a cpu Tensor into a cuda Tensor
|
||||
///
|
||||
/// auto c = torch::rand(10, torch::requires_grad()) + 2;
|
||||
/// std::cout << c.is_leaf() << std::endl; // prints `false`
|
||||
/// // c was created by the addition operation
|
||||
///
|
||||
/// auto d = torch::rand(10).cuda();
|
||||
/// std::cout << d.is_leaf() << std::endl; // prints `true`
|
||||
/// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine)
|
||||
///
|
||||
/// auto e = torch::rand(10).cuda().requires_grad_();
|
||||
/// std::cout << e.is_leaf() << std::endl; // prints `true`
|
||||
/// // e requires gradients and has no operations creating it
|
||||
///
|
||||
/// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true));
|
||||
/// std::cout << f.is_leaf() << std::endl; // prints `true`
|
||||
/// // f requires grad, has no operation creating it
|
||||
/// @endcode
|
||||
|
||||
/// \fn void backward(const Tensor & gradient={}, bool keep_graph=false, bool create_graph=false) const;
|
||||
///
|
||||
/// Computes the gradient of current tensor with respect to graph leaves.
|
||||
///
|
||||
/// The graph is differentiated using the chain rule. If the tensor is
|
||||
/// non-scalar (i.e. its data has more than one element) and requires
|
||||
/// gradient, the function additionally requires specifying ``gradient``.
|
||||
/// It should be a tensor of matching type and location, that contains
|
||||
/// the gradient of the differentiated function w.r.t. this Tensor.
|
||||
///
|
||||
/// This function accumulates gradients in the leaves - you might need to
|
||||
/// zero them before calling it.
|
||||
///
|
||||
/// \param gradient Gradient w.r.t. the
|
||||
/// tensor. If it is a tensor, it will be automatically converted
|
||||
/// to a Tensor that does not require grad unless ``create_graph`` is True.
|
||||
/// None values can be specified for scalar Tensors or ones that
|
||||
/// don't require grad. If a None value would be acceptable then
|
||||
/// this argument is optional.
|
||||
/// \param keep_graph If ``false``, the graph used to compute
|
||||
/// the grads will be freed. Note that in nearly all cases setting
|
||||
/// this option to True is not needed and often can be worked around
|
||||
/// in a much more efficient way. Defaults to the value of
|
||||
/// ``create_graph``.
|
||||
/// \param create_graph If ``true``, graph of the derivative will
|
||||
/// be constructed, allowing to compute higher order derivative
|
||||
/// products. Defaults to ``false``.
|
||||
|
||||
/// \fn Tensor detach() const;
|
||||
///
|
||||
/// Returns a new Tensor, detached from the current graph.
|
||||
/// The result will never require gradient.
|
||||
|
||||
/// \fn Tensor & detach_() const;
|
||||
///
|
||||
/// Detaches the Tensor from the graph that created it, making it a leaf.
|
||||
/// Views cannot be detached in-place.
|
||||
|
||||
/// \fn void retain_grad() const;
|
||||
///
|
||||
/// Enables .grad() for non-leaf Tensors.
|
||||
|
||||
Tensor& set_requires_grad(bool requires_grad) {
|
||||
impl_->set_requires_grad(requires_grad);
|
||||
return *this;
|
||||
@ -431,9 +510,16 @@ class CAFFE2_API Tensor {
|
||||
return impl_->requires_grad();
|
||||
}
|
||||
|
||||
/// Return a mutable reference to the gradient. This is conventionally
|
||||
/// used as `t.grad() = x` to set a gradient to a completely new tensor.
|
||||
Tensor& grad() {
|
||||
return impl_->grad();
|
||||
}
|
||||
|
||||
/// This function returns an undefined tensor by default and returns a defined tensor
|
||||
/// the first time a call to `backward()` computes gradients for this Tensor.
|
||||
/// The attribute will then contain the gradients computed and future calls
|
||||
/// to `backward()` will accumulate (add) gradients into it.
|
||||
const Tensor& grad() const {
|
||||
return impl_->grad();
|
||||
}
|
||||
@ -505,11 +591,38 @@ class CAFFE2_API Tensor {
|
||||
template <typename T>
|
||||
using hook_return_var_t = std::enable_if_t<std::is_same<typename std::result_of<T&(Tensor)>::type, Tensor>::value, unsigned>;
|
||||
|
||||
// Returns the index of the hook in the list which can be used to remove hook
|
||||
// Register a hook with no return value
|
||||
/// Registers a backward hook.
|
||||
///
|
||||
/// The hook will be called every time a gradient with respect to the Tensor is computed.
|
||||
/// The hook should have one of the following signature:
|
||||
/// ```
|
||||
/// hook(Tensor grad) -> Tensor
|
||||
/// ```
|
||||
/// ```
|
||||
/// hook(Tensor grad) -> void
|
||||
/// ```
|
||||
/// The hook should not modify its argument, but it can optionally return a new gradient
|
||||
/// which will be used in place of `grad`.
|
||||
///
|
||||
/// This function returns the index of the hook in the list which can be used to remove hook.
|
||||
///
|
||||
/// Example:
|
||||
/// @code
|
||||
/// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad());
|
||||
/// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient
|
||||
/// v.backward(torch::tensor({1., 2., 3.}));
|
||||
/// // This prints:
|
||||
/// // ```
|
||||
/// // 2
|
||||
/// // 4
|
||||
/// // 6
|
||||
/// // [ CPUFloatType{3} ]
|
||||
/// // ```
|
||||
/// std::cout << v.grad() << std::endl;
|
||||
/// v.remove_hook(h); // removes the hook
|
||||
/// @endcode
|
||||
template <typename T>
|
||||
hook_return_void_t<T> register_hook(T&& hook) const;
|
||||
// Register a hook with variable return value
|
||||
template <typename T>
|
||||
hook_return_var_t<T> register_hook(T&& hook) const;
|
||||
|
||||
@ -518,7 +631,7 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
// Remove hook at given position
|
||||
/// Remove hook at given position
|
||||
void remove_hook(unsigned pos) const;
|
||||
|
||||
// View Variables
|
||||
|
@ -69,12 +69,6 @@
|
||||
# define TH_UNUSED
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero")))
|
||||
#else
|
||||
#define __ubsan_ignore_float_divide_by_zero__
|
||||
#endif
|
||||
|
||||
#ifndef M_PI
|
||||
# define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
@ -9,7 +9,7 @@ set(extra_src)
|
||||
# loop over all types
|
||||
foreach(THC_TYPE Byte Char Short Int Long Half Float Double)
|
||||
# loop over files which need to be split between types (because of long compile times)
|
||||
foreach(THC_FILE TensorSort TensorMathPointwise TensorMathReduce TensorMasked)
|
||||
foreach(THC_FILE TensorSort TensorMathPointwise TensorMathReduce TensorMasked TensorTopK)
|
||||
if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/generated/THC${THC_FILE}${THC_TYPE}.cu")
|
||||
FILE(WRITE "${CMAKE_CURRENT_SOURCE_DIR}/generated/THC${THC_FILE}${THC_TYPE}.cu"
|
||||
"#include <THC/THC${THC_FILE}.cuh>\n#include <THC/THCTensor.hpp>\n\n#include <THC/generic/THC${THC_FILE}.cu>\n#include <THC/THCGenerate${THC_TYPE}Type.h>\n")
|
||||
@ -56,7 +56,6 @@ set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorIndex.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorRandom.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorScatterGather.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorTopK.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorSort.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCSortUtils.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/THCTensorMode.cu
|
||||
|
@ -1,19 +0,0 @@
|
||||
#include <THC/THC.h>
|
||||
#include <THC/THCReduceApplyUtils.cuh>
|
||||
#include <THC/THCTensorCopy.h>
|
||||
#include <THC/THCTensorMath.h>
|
||||
#include <THC/THCAsmUtils.cuh>
|
||||
#include <THC/THCScanUtils.cuh>
|
||||
#include <THC/THCTensorTypeUtils.cuh>
|
||||
#include <THC/THCTensorMathReduce.cuh>
|
||||
#include <ATen/WrapDimUtils.h>
|
||||
#include <algorithm> // for std::min
|
||||
|
||||
#if CUDA_VERSION >= 7000 || defined __HIP_PLATFORM_HCC__
|
||||
#include <thrust/system/cuda/execution_policy.h>
|
||||
#endif
|
||||
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateAllTypes.h>
|
@ -1,6 +1,21 @@
|
||||
#ifndef THC_TENSOR_TOPK_CUH
|
||||
#define THC_TENSOR_TOPK_CUH
|
||||
|
||||
#include <THC/THC.h>
|
||||
#include <THC/THCReduceApplyUtils.cuh>
|
||||
#include <THC/THCTensorCopy.h>
|
||||
#include <THC/THCTensorMath.h>
|
||||
#include <THC/THCAsmUtils.cuh>
|
||||
#include <THC/THCScanUtils.cuh>
|
||||
#include <THC/THCTensorTypeUtils.cuh>
|
||||
#include <THC/THCTensorMathReduce.cuh>
|
||||
#include <ATen/WrapDimUtils.h>
|
||||
#include <algorithm> // for std::min
|
||||
|
||||
#if CUDA_VERSION >= 7000 || defined __HIP_PLATFORM_HCC__
|
||||
#include <thrust/system/cuda/execution_policy.h>
|
||||
#endif
|
||||
|
||||
#include <c10/macros/Macros.h>
|
||||
#include <ATen/native/cuda/SortingRadixSelect.cuh>
|
||||
|
||||
@ -52,6 +67,7 @@ __global__ void gatherTopK(TensorInfo<T, IndexType> input,
|
||||
inputSliceStart, outputSliceSize,
|
||||
inputSliceSize, inputWithinSliceStride,
|
||||
smem, &topKValue);
|
||||
const auto topKConverted = at::native::TopKTypeConfig<T>::convert(topKValue);
|
||||
|
||||
// Every value that is strictly less/greater than `pattern`
|
||||
// (depending on sort dir) in sorted int format is in the top-K.
|
||||
@ -74,11 +90,12 @@ __global__ void gatherTopK(TensorInfo<T, IndexType> input,
|
||||
bool inRange = (i < inputSliceSize);
|
||||
T v =
|
||||
inRange ? doLdg(&inputSliceStart[i * inputWithinSliceStride]) : ScalarConvert<int, T>::to(0);
|
||||
const auto convertedV = at::native::TopKTypeConfig<T>::convert(v);
|
||||
bool hasTopK;
|
||||
if (Order) {
|
||||
hasTopK = inRange && (THCNumerics<T>::gt(v, topKValue));
|
||||
hasTopK = inRange && (convertedV > topKConverted);
|
||||
} else {
|
||||
hasTopK = inRange && (THCNumerics<T>::lt(v, topKValue));
|
||||
hasTopK = inRange && (convertedV < topKConverted);
|
||||
}
|
||||
|
||||
int index;
|
||||
@ -111,7 +128,8 @@ __global__ void gatherTopK(TensorInfo<T, IndexType> input,
|
||||
bool inRange = (i < inputSliceSize);
|
||||
T v =
|
||||
inRange ? doLdg(&inputSliceStart[i * inputWithinSliceStride]) : ScalarConvert<int, T>::to(0);
|
||||
bool hasTopK = inRange && (THCNumerics<T>::eq(v, topKValue));
|
||||
const auto convertedV = at::native::TopKTypeConfig<T>::convert(v);
|
||||
bool hasTopK = inRange && (convertedV == topKConverted);
|
||||
|
||||
int index;
|
||||
int carry;
|
||||
|
5
aten/src/THC/generated/THCTensorTopKByte.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKByte.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateByteType.h>
|
5
aten/src/THC/generated/THCTensorTopKChar.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKChar.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateCharType.h>
|
5
aten/src/THC/generated/THCTensorTopKDouble.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKDouble.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateDoubleType.h>
|
5
aten/src/THC/generated/THCTensorTopKFloat.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKFloat.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateFloatType.h>
|
5
aten/src/THC/generated/THCTensorTopKHalf.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKHalf.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateHalfType.h>
|
5
aten/src/THC/generated/THCTensorTopKInt.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKInt.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateIntType.h>
|
5
aten/src/THC/generated/THCTensorTopKLong.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKLong.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateLongType.h>
|
5
aten/src/THC/generated/THCTensorTopKShort.cu
Normal file
5
aten/src/THC/generated/THCTensorTopKShort.cu
Normal file
@ -0,0 +1,5 @@
|
||||
#include <THC/THCTensorTopK.cuh>
|
||||
#include <THC/THCTensor.hpp>
|
||||
|
||||
#include <THC/generic/THCTensorTopK.cu>
|
||||
#include <THC/THCGenerateShortType.h>
|
@ -23,6 +23,14 @@
|
||||
|
||||
#include "c10/macros/Export.h"
|
||||
|
||||
#if defined(__clang__)
|
||||
#define __ubsan_ignore_float_divide_by_zero__ __attribute__((no_sanitize("float-divide-by-zero")))
|
||||
#define __ubsan_ignore_float_cast_overflow__ __attribute__((no_sanitize("float-cast-overflow")))
|
||||
#else
|
||||
#define __ubsan_ignore_float_divide_by_zero__
|
||||
#define __ubsan_ignore_float_cast_overflow__
|
||||
#endif
|
||||
|
||||
// Disable the copy and assignment operator for a class. Note that this will
|
||||
// disable the usage of the class in std containers.
|
||||
#define C10_DISABLE_COPY_AND_ASSIGN(classname) \
|
||||
|
@ -66,24 +66,44 @@ void Error::AppendMessage(const std::string& new_msg) {
|
||||
namespace Warning {
|
||||
|
||||
namespace {
|
||||
WarningHandler* getHandler() {
|
||||
WarningHandler* getBaseHandler() {
|
||||
static WarningHandler base_warning_handler_ = WarningHandler();
|
||||
return &base_warning_handler_;
|
||||
};
|
||||
static thread_local WarningHandler* warning_handler_ = getHandler();
|
||||
|
||||
class ThreadWarningHandler {
|
||||
public:
|
||||
ThreadWarningHandler() = delete;
|
||||
|
||||
static WarningHandler* get_handler() {
|
||||
if (!warning_handler_) {
|
||||
warning_handler_ = getBaseHandler();
|
||||
}
|
||||
return warning_handler_;
|
||||
}
|
||||
|
||||
static void set_handler(WarningHandler* handler) {
|
||||
warning_handler_ = handler;
|
||||
}
|
||||
|
||||
private:
|
||||
static thread_local WarningHandler* warning_handler_;
|
||||
};
|
||||
|
||||
thread_local WarningHandler* ThreadWarningHandler::warning_handler_ = nullptr;
|
||||
|
||||
}
|
||||
|
||||
void warn(SourceLocation source_location, const std::string& msg) {
|
||||
warning_handler_->process(source_location, msg);
|
||||
ThreadWarningHandler::get_handler()->process(source_location, msg);
|
||||
}
|
||||
|
||||
void set_warning_handler(WarningHandler* handler) noexcept(true) {
|
||||
warning_handler_ = handler;
|
||||
ThreadWarningHandler::set_handler(handler);
|
||||
}
|
||||
|
||||
WarningHandler* get_warning_handler() noexcept(true) {
|
||||
return warning_handler_;
|
||||
return ThreadWarningHandler::get_handler();
|
||||
}
|
||||
|
||||
} // namespace Warning
|
||||
|
@ -67,7 +67,7 @@ struct maybe_real<true, src_t> {
|
||||
|
||||
template <typename dest_t, typename src_t>
|
||||
struct static_cast_with_inter_type {
|
||||
C10_HOST_DEVICE static inline dest_t apply(src_t src) {
|
||||
C10_HOST_DEVICE __ubsan_ignore_float_cast_overflow__ static inline dest_t apply(src_t src) {
|
||||
constexpr bool real = needs_real<dest_t, src_t>::value;
|
||||
return static_cast<dest_t>(
|
||||
static_cast<inter_copy_type_t<dest_t>>(maybe_real<real, src_t>::apply(src)));
|
||||
|
@ -748,7 +748,7 @@ if (NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
|
||||
target_include_directories(torch_cuda PUBLIC "${NVTOOLEXT_HOME}/include")
|
||||
# -INCLUDE is used to ensure torch_cuda is linked against in a project that relies on it.
|
||||
# Related issue: https://github.com/pytorch/pytorch/issues/31611
|
||||
target_link_libraries(torch_cuda INTERFACE "-INCLUDE:\"?warp_size@cuda@at@@YAHXZ\"")
|
||||
target_link_libraries(torch_cuda INTERFACE "-INCLUDE:?warp_size@cuda@at@@YAHXZ")
|
||||
|
||||
elseif(APPLE)
|
||||
set(TORCH_CUDA_LIBRARIES
|
||||
@ -949,6 +949,31 @@ if (USE_OPENMP AND OPENMP_FOUND)
|
||||
target_link_libraries(torch_cpu PRIVATE ${OpenMP_CXX_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if ($ENV{TH_BINARY_BUILD})
|
||||
if (NOT MSVC AND USE_CUDA AND NOT APPLE)
|
||||
# Note [Extra MKL symbols for MAGMA in torch_cpu]
|
||||
#
|
||||
# When we build CUDA libraries and link against MAGMA, MAGMA makes use of
|
||||
# some BLAS symbols in its CPU fallbacks when it has no GPU versions
|
||||
# of kernels. Previously, we ensured the BLAS symbols were filled in by
|
||||
# MKL by linking torch_cuda with BLAS, but when we are statically linking
|
||||
# against MKL (when we do wheel builds), this actually ends up pulling in a
|
||||
# decent chunk of MKL into torch_cuda, inflating our torch_cuda binary
|
||||
# size by 8M. torch_cpu exposes most of the MKL symbols we need, but
|
||||
# empirically we determined that there are four which it doesn't provide. If
|
||||
# we link torch_cpu with these --undefined symbols, we can ensure they
|
||||
# do get pulled in, and then we can avoid statically linking in MKL to
|
||||
# torch_cuda at all!
|
||||
#
|
||||
# We aren't really optimizing for binary size on Windows (and this link
|
||||
# line doesn't work on Windows), so don't do it there.
|
||||
#
|
||||
# These linker commands do not work on OS X, do not attempt this there.
|
||||
# (It shouldn't matter anyway, though, because OS X has dropped CUDA support)
|
||||
set_target_properties(torch_cpu PROPERTIES LINK_FLAGS "-Wl,--undefined=mkl_lapack_slaed0 -Wl,--undefined=mkl_lapack_dlaed0 -Wl,--undefined=mkl_lapack_dormql -Wl,--undefined=mkl_lapack_sormql")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
target_link_libraries(torch_cpu PUBLIC c10)
|
||||
target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
|
||||
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include "caffe2/operators/fused_rowwise_nbitfake_conversion_ops.h"
|
||||
#include <fp16.h>
|
||||
#ifdef __AVX__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#include "c10/util/Registry.h"
|
||||
|
||||
namespace caffe2 {
|
||||
|
@ -50,8 +50,13 @@ __global__ void ReluCUDAKernel<half2>(const int N, const half2* X, half2* Y) {
|
||||
Y[i] = __hmul2(__hgt2(__ldg(X + i), kZero), __ldg(X + i));
|
||||
#else
|
||||
const float2 xx = __half22float2(X[i]);
|
||||
Y[i] =
|
||||
__floats2half2_rn(xx.x > 0.0f ? xx.x : 0.0f, xx.y > 0.0f ? xx.y : 0.0f);
|
||||
// There are explicit cast to float here, because it may otherwise cause ambiguity on ROCm and can be triggered
|
||||
// sometimes:
|
||||
//
|
||||
// error: conditional expression is ambiguous; 'const hip_impl::Scalar_accessor<float, Native_vec_, 0>' can be
|
||||
// converted to 'float' and vice versa
|
||||
Y[i] = __floats2half2_rn(xx.x > 0.0f ? static_cast<float>(xx.x) : 0.0f,
|
||||
xx.y > 0.0f ? static_cast<float>(xx.y) : 0.0f);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -100,8 +105,14 @@ __global__ void ReluGradientCUDAKernel<half2>(
|
||||
#else
|
||||
const float2 dy = __half22float2(dY[i]);
|
||||
const float2 yy = __half22float2(Y[i]);
|
||||
dX[i] =
|
||||
__floats2half2_rn(yy.x > 0.0f ? dy.x : 0.0f, yy.y > 0.0f ? dy.y : 0.0f);
|
||||
// There are explicit cast to float here, because it may otherwise cause ambiguity on ROCm and can be triggered
|
||||
// sometimes:
|
||||
//
|
||||
// error: conditional expression is ambiguous; 'const hip_impl::Scalar_accessor<float, Native_vec_, 1>' can be
|
||||
// converted to 'float' and vice versa
|
||||
|
||||
dX[i] = __floats2half2_rn(yy.x > 0.0f ? static_cast<float>(dy.x) : 0.0f,
|
||||
yy.y > 0.0f ? static_cast<float>(dy.y) : 0.0f);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
40
cmake/External/nccl.cmake
vendored
40
cmake/External/nccl.cmake
vendored
@ -15,6 +15,7 @@ if (NOT __NCCL_INCLUDED)
|
||||
# this second replacement is needed when there are multiple archs
|
||||
string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}")
|
||||
|
||||
set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
|
||||
ExternalProject_Add(nccl_external
|
||||
SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl/nccl
|
||||
BUILD_IN_SOURCE 1
|
||||
@ -30,20 +31,49 @@ if (NOT __NCCL_INCLUDED)
|
||||
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
|
||||
"NVCC=${CUDA_NVCC_EXECUTABLE}"
|
||||
"NVCC_GENCODE=${NVCC_GENCODE}"
|
||||
"BUILDDIR=${CMAKE_CURRENT_BINARY_DIR}/nccl"
|
||||
"BUILDDIR=${__NCCL_BUILD_DIR}"
|
||||
"VERBOSE=0"
|
||||
"-j"
|
||||
BUILD_BYPRODUCTS "${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a"
|
||||
BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
|
||||
INSTALL_COMMAND ""
|
||||
)
|
||||
|
||||
# Detect objcopy version
|
||||
execute_process (COMMAND "${CMAKE_OBJCOPY}" "--version" OUTPUT_VARIABLE OBJCOPY_VERSION_STR)
|
||||
string(REGEX REPLACE "GNU objcopy version ([0-9])\\.([0-9]+).*" "\\1" OBJCOPY_VERSION_MAJOR ${OBJCOPY_VERSION_STR})
|
||||
string(REGEX REPLACE "GNU objcopy version ([0-9])\\.([0-9]+).*" "\\2" OBJCOPY_VERSION_MINOR ${OBJCOPY_VERSION_STR})
|
||||
|
||||
if ((${OBJCOPY_VERSION_MAJOR} GREATER 2) OR ((${OBJCOPY_VERSION_MAJOR} EQUAL 2) AND (${OBJCOPY_VERSION_MINOR} GREATER 27)))
|
||||
message(WARNING "Enabling NCCL library slimming")
|
||||
add_custom_command(
|
||||
OUTPUT "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a"
|
||||
DEPENDS nccl_external
|
||||
COMMAND "${CMAKE_COMMAND}" -E make_directory "${__NCCL_BUILD_DIR}/objects"
|
||||
COMMAND cd objects
|
||||
COMMAND "${CMAKE_AR}" x "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
|
||||
COMMAND for obj in all_gather_* all_reduce_* broadcast_* reduce_*.o$<SEMICOLON> do "${CMAKE_OBJCOPY}" --remove-relocations .nvFatBinSegment --remove-section __nv_relfatbin $$obj$<SEMICOLON> done
|
||||
COMMAND "${CMAKE_AR}" cr "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a" "*.o"
|
||||
COMMAND cd -
|
||||
COMMAND "${CMAKE_COMMAND}" -E remove_directory "${__NCCL_BUILD_DIR}/objects"
|
||||
WORKING_DIRECTORY "${__NCCL_BUILD_DIR}"
|
||||
COMMENT "Slimming NCCL"
|
||||
)
|
||||
add_custom_target(nccl_slim_external DEPENDS "${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a")
|
||||
set(__NCCL_LIBRARY_DEP nccl_slim_external)
|
||||
set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_slim_static.a)
|
||||
else()
|
||||
message(WARNING "Objcopy version is too old to support NCCL library slimming")
|
||||
set(__NCCL_LIBRARY_DEP nccl_external)
|
||||
set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)
|
||||
endif()
|
||||
|
||||
|
||||
set(NCCL_FOUND TRUE)
|
||||
add_library(__caffe2_nccl INTERFACE)
|
||||
# The following old-style variables are set so that other libs, such as Gloo,
|
||||
# can still use it.
|
||||
set(NCCL_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/nccl/include)
|
||||
set(NCCL_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/nccl/lib/libnccl_static.a)
|
||||
add_dependencies(__caffe2_nccl nccl_external)
|
||||
set(NCCL_INCLUDE_DIRS ${__NCCL_BUILD_DIR}/include)
|
||||
add_dependencies(__caffe2_nccl ${__NCCL_LIBRARY_DEP})
|
||||
target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
|
||||
target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
@ -56,6 +56,10 @@ INPUT = ../../../aten/src/ATen/ATen.h \
|
||||
../../../c10/cuda/CUDAStream.h \
|
||||
../../../torch/csrc/api/include \
|
||||
../../../torch/csrc/api/src \
|
||||
../../../torch/csrc/autograd/autograd.h \
|
||||
../../../torch/csrc/autograd/custom_function.h \
|
||||
../../../torch/csrc/autograd/function.h \
|
||||
../../../torch/csrc/autograd/variable.h \
|
||||
../../../torch/csrc/autograd/generated/variable_factories.h \
|
||||
../../../torch/csrc/jit/runtime/custom_operator.h \
|
||||
../../../torch/csrc/jit/serialization/import.h \
|
||||
|
@ -281,7 +281,9 @@ change one property, this is quite practical.
|
||||
In conclusion, we can now compare how ``TensorOptions`` defaults, together with
|
||||
the abbreviated API for creating ``TensorOptions`` using free functions, allow
|
||||
tensor creation in C++ with the same convenience as in Python. Compare this
|
||||
call in Python::
|
||||
call in Python:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
torch.randn(3, 4, dtype=torch.float32, device=torch.device('cuda', 1), requires_grad=True)
|
||||
|
||||
|
99
docs/cpp/source/notes/tensor_indexing.rst
Normal file
99
docs/cpp/source/notes/tensor_indexing.rst
Normal file
@ -0,0 +1,99 @@
|
||||
Tensor Indexing API
|
||||
===================
|
||||
|
||||
Indexing a tensor in the PyTorch C++ API works very similar to the Python API.
|
||||
All index types such as ``None`` / ``...`` / integer / boolean / slice / tensor
|
||||
are available in the C++ API, making translation from Python indexing code to C++
|
||||
very simple. The main difference is that, instead of using the ``[]``-operator
|
||||
similar to the Python API syntax, in the C++ API the indexing methods are:
|
||||
|
||||
- ``torch::Tensor::index`` (`link <https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4NK2at6Tensor5indexE8ArrayRefIN2at8indexing11TensorIndexEE>`_)
|
||||
- ``torch::Tensor::index_put_`` (`link <https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4N2at6Tensor10index_put_E8ArrayRefIN2at8indexing11TensorIndexEERK6Tensor>`_)
|
||||
|
||||
It's also important to note that index types such as ``None`` / ``Ellipsis`` / ``Slice``
|
||||
live in the ``torch::indexing`` namespace, and it's recommended to put ``using namespace torch::indexing``
|
||||
before any indexing code for convenient use of those index types.
|
||||
|
||||
Here are some examples of translating Python indexing code to C++:
|
||||
|
||||
Getter
|
||||
------
|
||||
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| Python | C++ (assuming ``using namespace torch::indexing``) |
|
||||
+==========================================================+======================================================================================+
|
||||
| ``tensor[None]`` | ``tensor.index({None})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[Ellipsis, ...]`` | ``tensor.index({Ellipsis, "..."})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[1, 2]`` | ``tensor.index({1, 2})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[True, False]`` | ``tensor.index({true, false})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[1::2]`` | ``tensor.index({Slice(1, None, 2)})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[torch.tensor([1, 2])]`` | ``tensor.index({torch::tensor({1, 2})})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[..., 0, True, 1::2, torch.tensor([1, 2])]`` | ``tensor.index({"...", 0, true, Slice(1, None, 2), torch::tensor({1, 2})})`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
|
||||
Setter
|
||||
------
|
||||
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| Python | C++ (assuming ``using namespace torch::indexing``) |
|
||||
+==========================================================+======================================================================================+
|
||||
| ``tensor[None] = 1`` | ``tensor.index_put_({None}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[Ellipsis, ...] = 1`` | ``tensor.index_put_({Ellipsis, "..."}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[1, 2] = 1`` | ``tensor.index_put_({1, 2}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[True, False] = 1`` | ``tensor.index_put_({true, false}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[1::2] = 1`` | ``tensor.index_put_({Slice(1, None, 2)}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[torch.tensor([1, 2])] = 1`` | ``tensor.index_put_({torch::tensor({1, 2})}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
| ``tensor[..., 0, True, 1::2, torch.tensor([1, 2])] = 1`` | ``tensor.index_put_({"...", 0, true, Slice(1, None, 2), torch::tensor({1, 2})}, 1)`` |
|
||||
+----------------------------------------------------------+--------------------------------------------------------------------------------------+
|
||||
|
||||
|
||||
Translating between Python/C++ index types
|
||||
------------------------------------------
|
||||
|
||||
The one-to-one translation between Python and C++ index types is as follows:
|
||||
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| Python | C++ (assuming ``using namespace torch::indexing``) |
|
||||
+=========================+========================================================================+
|
||||
| ``None`` | ``None`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``Ellipsis`` | ``Ellipsis`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``...`` | ``"..."`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``123`` | ``123`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``True`` | ``true`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``False`` | ``false`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``:`` or ``::`` | ``Slice()`` or ``Slice(None, None)`` or ``Slice(None, None, None)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``1:`` or ``1::`` | ``Slice(1, None)`` or ``Slice(1, None, None)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``:3`` or ``:3:`` | ``Slice(None, 3)`` or ``Slice(None, 3, None)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``::2`` | ``Slice(None, None, 2)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``1:3`` | ``Slice(1, 3)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``1::2`` | ``Slice(1, None, 2)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``:3:2`` | ``Slice(None, 3, 2)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``1:3:2`` | ``Slice(1, 3, 2)`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
||||
| ``torch.tensor([1, 2])``| ``torch::tensor({1, 2})`` |
|
||||
+-------------------------+------------------------------------------------------------------------+
|
@ -1,4 +1,4 @@
|
||||
sphinx
|
||||
sphinx==2.4.4
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
|
||||
sphinxcontrib.katex
|
||||
matplotlib
|
||||
|
@ -13,6 +13,13 @@ use ``torch.float16`` (``half``). Some operations, like linear layers and convol
|
||||
are much faster in ``float16``. Other operations, like reductions, often require the dynamic
|
||||
range of ``float32``. Networks running in mixed precision try to match each operation to its appropriate datatype.
|
||||
|
||||
.. warning::
|
||||
:class:`torch.cuda.amp.GradScaler` is not a complete implementation of automatic mixed precision.
|
||||
:class:`GradScaler` is only useful if you manually run regions of your model in ``float16``.
|
||||
If you aren't sure how to choose op precision manually, the master branch and nightly pip/conda
|
||||
builds include a context manager that chooses op precision automatically wherever it's enabled.
|
||||
See the `master documentation<https://pytorch.org/docs/master/amp.html>`_ for details.
|
||||
|
||||
.. contents:: :local:
|
||||
|
||||
.. _gradient-scaling:
|
||||
|
@ -395,6 +395,8 @@ of 16
|
||||
.. autofunction:: all_gather_multigpu
|
||||
|
||||
|
||||
.. _distributed-launch:
|
||||
|
||||
Launch utility
|
||||
--------------
|
||||
|
||||
|
@ -16,7 +16,6 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
|
||||
:caption: Notes
|
||||
|
||||
notes/*
|
||||
PyTorch on XLA Devices <http://pytorch.org/xla/>
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
@ -46,7 +45,7 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
|
||||
onnx
|
||||
optim
|
||||
quantization
|
||||
rpc
|
||||
rpc/index.rst
|
||||
torch.random <random>
|
||||
sparse
|
||||
storage
|
||||
@ -62,24 +61,15 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
|
||||
name_inference
|
||||
torch.__config__ <__config__>
|
||||
|
||||
.. toctree::
|
||||
:glob:
|
||||
:maxdepth: 2
|
||||
:caption: torchvision Reference
|
||||
|
||||
torchvision/index
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: torchaudio Reference
|
||||
|
||||
:caption: Libraries
|
||||
|
||||
PyTorch on XLA Devices <http://pytorch.org/xla/>
|
||||
PyTorch Elastic (torchelastic) <https://pytorch.org/elastic/>
|
||||
torchaudio <https://pytorch.org/audio>
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: torchtext Reference
|
||||
|
||||
torchtext <https://pytorch.org/text>
|
||||
torchvision/index
|
||||
|
||||
.. toctree::
|
||||
:glob:
|
||||
|
@ -5,6 +5,13 @@ Automatic Mixed Precision examples
|
||||
|
||||
.. currentmodule:: torch.cuda.amp
|
||||
|
||||
.. warning::
|
||||
:class:`torch.cuda.amp.GradScaler` is not a complete implementation of automatic mixed precision.
|
||||
:class:`GradScaler` is only useful if you manually run regions of your model in ``float16``.
|
||||
If you aren't sure how to choose op precision manually, the master branch and nightly pip/conda
|
||||
builds include a context manager that chooses op precision automatically wherever it's enabled.
|
||||
See the `master documentation<https://pytorch.org/docs/master/amp.html>`_ for details.
|
||||
|
||||
.. contents:: :local:
|
||||
|
||||
.. _gradient-scaling-examples:
|
||||
|
@ -306,20 +306,30 @@ to overlap data transfers with computation.
|
||||
You can make the :class:`~torch.utils.data.DataLoader` return batches placed in
|
||||
pinned memory by passing ``pin_memory=True`` to its constructor.
|
||||
|
||||
.. _cuda-nn-dataparallel-instead:
|
||||
.. _cuda-nn-ddp-instead:
|
||||
|
||||
Use nn.DataParallel instead of multiprocessing
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Use nn.parallel.DistributedDataParallel instead of multiprocessing or nn.DataParallel
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Most use cases involving batched inputs and multiple GPUs should default to
|
||||
using :class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with
|
||||
the GIL, a single Python process can saturate multiple GPUs.
|
||||
|
||||
As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized.
|
||||
However, this is a known issue that is under active development. As always,
|
||||
test your use case.
|
||||
using :class:`~torch.nn.parallel.DistributedDataParallel` to utilize more
|
||||
than one GPU.
|
||||
|
||||
There are significant caveats to using CUDA models with
|
||||
:mod:`~torch.multiprocessing`; unless care is taken to meet the data handling
|
||||
requirements exactly, it is likely that your program will have incorrect or
|
||||
undefined behavior.
|
||||
|
||||
It is recommended to use :class:`~torch.nn.parallel.DistributedDataParallel`,
|
||||
instead of :class:`~torch.nn.DataParallel` to do multi-GPU training, even if
|
||||
there is only a single node.
|
||||
|
||||
The difference between :class:`~torch.nn.parallel.DistributedDataParallel` and
|
||||
:class:`~torch.nn.DataParallel` is: :class:`~torch.nn.parallel.DistributedDataParallel`
|
||||
uses multiprocessing where a process is created for each GPU, while
|
||||
:class:`~torch.nn.DataParallel` uses multithreading. By using multiprocessing,
|
||||
each GPU has its dedicated process, this avoids the performance overhead caused
|
||||
by GIL of Python interpreter.
|
||||
|
||||
If you use :class:`~torch.nn.parallel.DistributedDataParallel`, you could use
|
||||
`torch.distributed.launch` utility to launch your program, see :ref:`distributed-launch`.
|
||||
|
@ -45,7 +45,7 @@ the consumer process has references to the tensor, and the refcounting can not
|
||||
save you if the consumer process exits abnormally via a fatal signal. See
|
||||
:ref:`this section <multiprocessing-cuda-sharing-details>`.
|
||||
|
||||
See also: :ref:`cuda-nn-dataparallel-instead`
|
||||
See also: :ref:`cuda-nn-ddp-instead`
|
||||
|
||||
|
||||
Best practices and tips
|
||||
|
24
docs/source/rpc/index.rst
Normal file
24
docs/source/rpc/index.rst
Normal file
@ -0,0 +1,24 @@
|
||||
.. _rpc-index:
|
||||
|
||||
Distributed RPC Framework
|
||||
==============================
|
||||
|
||||
The distributed RPC framework provides mechanisms for multi-machine model training through a set of primitives to allow for remote communication, and a higher-level API to automatically differentiate models split across several machines.
|
||||
|
||||
- :ref:`distributed-rpc-framework`
|
||||
|
||||
Design Notes
|
||||
-----------
|
||||
The distributed autograd design note covers the design of the RPC-based distributed autograd framework that is useful for applications such as model parallel training.
|
||||
|
||||
- :ref:`distributed-autograd-design`
|
||||
|
||||
The RRef design note covers the design of the :ref:`rref` (Remote REFerence) protocol used to refer to values on remote workers by the framework.
|
||||
|
||||
- :ref:`remote-reference-protocol`
|
||||
|
||||
Tutorials
|
||||
---------
|
||||
The RPC tutorial introduces users to the RPC framework and provides two example applications using :ref:`torch.distributed.rpc<distributed-rpc-framework>` APIs.
|
||||
|
||||
- `Getting started with Distributed RPC Framework <https://pytorch.org/tutorials/intermediate/rpc_tutorial.html>`__
|
@ -8,6 +8,8 @@ training through a set of primitives to allow for remote communication, and a
|
||||
higher-level API to automatically differentiate models split across several
|
||||
machines.
|
||||
|
||||
.. warning ::
|
||||
APIs in the RPC package are stable. There are multiple ongoing work items to improve performance and error handling, which will ship in future releases.
|
||||
|
||||
|
||||
Basics
|
@ -210,3 +210,25 @@ Example::
|
||||
(1, 5)
|
||||
|
||||
For more information on ``torch.sparse_coo`` tensors, see :ref:`sparse-docs`.
|
||||
|
||||
torch.memory_format
|
||||
------------
|
||||
|
||||
.. class:: torch.memory_format
|
||||
|
||||
A :class:`torch.memory_format` is an object representing the memory format on which a :class:`torch.Tensor` is
|
||||
or will be allocated.
|
||||
|
||||
Possible values are:
|
||||
|
||||
- ``torch.contiguous_format``:
|
||||
Tensor is or will be allocated in dense non-overlapping memory. Strides represented by values in decreasing order.
|
||||
|
||||
- ``torch.channels_last``:
|
||||
Tensor is or will be allocated in dense non-overlapping memory. Strides represented by values in
|
||||
``strides[0] > strides[2] > strides[3] > strides[1] == 1`` aka NHWC order.
|
||||
|
||||
- ``torch.preserve_format``:
|
||||
Used in functions like `clone` to preserve the memory format of the input tensor. If input tensor is
|
||||
allocated in dense non-overlapping memory, the output tensor strides will be copied from the input.
|
||||
Otherwise output strides will follow ``torch.contiguous_format``
|
@ -49,8 +49,10 @@ For reference, here’s a full list of view ops in PyTorch:
|
||||
|
||||
- Basic slicing and indexing op, e.g. ``tensor[0, 2:, 1:7:2]`` returns a view of base ``tensor``, see note below.
|
||||
- :meth:`~torch.Tensor.as_strided`
|
||||
- :meth:`~torch.Tensor.detach`
|
||||
- :meth:`~torch.Tensor.diagonal`
|
||||
- :meth:`~torch.Tensor.expand`
|
||||
- :meth:`~torch.Tensor.expand_as`
|
||||
- :meth:`~torch.Tensor.narrow`
|
||||
- :meth:`~torch.Tensor.permute`
|
||||
- :meth:`~torch.Tensor.select`
|
||||
|
@ -296,7 +296,6 @@ view of a storage and defines numeric operations on it.
|
||||
.. automethod:: hardshrink
|
||||
.. automethod:: histc
|
||||
.. automethod:: ifft
|
||||
.. automethod:: imag
|
||||
.. automethod:: index_add_
|
||||
.. automethod:: index_add
|
||||
.. automethod:: index_copy_
|
||||
@ -413,7 +412,6 @@ view of a storage and defines numeric operations on it.
|
||||
:noindex:
|
||||
.. automethod:: remainder
|
||||
.. automethod:: remainder_
|
||||
.. automethod:: real
|
||||
.. automethod:: renorm
|
||||
.. automethod:: renorm_
|
||||
.. automethod:: repeat
|
||||
@ -495,6 +493,8 @@ view of a storage and defines numeric operations on it.
|
||||
.. automethod:: tril_
|
||||
.. automethod:: triu
|
||||
.. automethod:: triu_
|
||||
.. automethod:: true_divide
|
||||
.. automethod:: true_divide_
|
||||
.. automethod:: trunc
|
||||
.. automethod:: trunc_
|
||||
.. automethod:: type
|
||||
|
4
setup.py
4
setup.py
@ -352,10 +352,10 @@ def build_deps():
|
||||
################################################################################
|
||||
|
||||
# the list of runtime dependencies required by this built package
|
||||
install_requires = []
|
||||
install_requires = ['future']
|
||||
|
||||
if sys.version_info <= (2, 7):
|
||||
install_requires += ['future', 'typing']
|
||||
install_requires += ['typing']
|
||||
|
||||
missing_pydep = '''
|
||||
Missing build dependency: Unable to `import {importname}`.
|
||||
|
@ -21,100 +21,109 @@ white_list = [
|
||||
# We export some functions and classes for test_jit.py directly from libtorch.so,
|
||||
# it's not important to have BC for them
|
||||
('_TorchScriptTesting.*', datetime.date(9999, 1, 1)),
|
||||
('aten::pop*', datetime.date(2020, 4, 1)),
|
||||
('aten::insert*', datetime.date(2020, 4, 1)),
|
||||
('aten::Delete*', datetime.date(2020, 4, 1)),
|
||||
('aten::clear*', datetime.date(2020, 4, 1)),
|
||||
('aten::_set_item*', datetime.date(2020, 4, 1)),
|
||||
('aten::copy*', datetime.date(2020, 4, 1)),
|
||||
('aten::extend*', datetime.date(2020, 4, 1)),
|
||||
('aten::reverse*', datetime.date(2020, 4, 1)),
|
||||
('aten::append*', datetime.date(2020, 4, 1)),
|
||||
('aten::list*', datetime.date(2020, 4, 1)),
|
||||
('aten::__getitem__*', datetime.date(2020, 4, 1)),
|
||||
('aten::len*', datetime.date(2020, 4, 1)),
|
||||
('aten::mul_*', datetime.date(2020, 4, 1)),
|
||||
('aten::slice*', datetime.date(2020, 4, 1)),
|
||||
('aten::add*', datetime.date(2020, 4, 1)),
|
||||
('aten::mul*', datetime.date(2020, 4, 1)),
|
||||
('aten::select*', datetime.date(2020, 4, 1)),
|
||||
('aten::add_*', datetime.date(2020, 4, 1)),
|
||||
# _like default change, see https://github.com/pytorch/pytorch/issues/33580
|
||||
('aten::randn_like', datetime.date(2020, 3, 15)),
|
||||
('aten::full_like', datetime.date(2020, 3, 15)),
|
||||
('aten::empty_like', datetime.date(2020, 3, 15)),
|
||||
('aten::rand_like', datetime.date(2020, 3, 15)),
|
||||
('aten::ones_like', datetime.date(2020, 3, 15)),
|
||||
('aten::randint_like', datetime.date(2020, 3, 15)),
|
||||
('aten::zeros_like', datetime.date(2020, 3, 15)),
|
||||
('aten::floor_divide', datetime.date(2020, 4, 1)),
|
||||
('aten::Bool', datetime.date(2020, 4, 1)),
|
||||
('aten::Float', datetime.date(2020, 4, 1)),
|
||||
('aten::to', datetime.date(2020, 4, 1)),
|
||||
('aten::backward', datetime.date(2020, 4, 1)),
|
||||
('aten::len', datetime.date(2020, 4, 1)),
|
||||
('aten::remove', datetime.date(2020, 4, 1)),
|
||||
('aten::index', datetime.date(2020, 4, 1)),
|
||||
('aten::count', datetime.date(2020, 4, 1)),
|
||||
('aten::__contains__', datetime.date(2020, 4, 1)),
|
||||
('aten::sort', datetime.date(2020, 4, 1)),
|
||||
('aten::sorted', datetime.date(2020, 4, 1)),
|
||||
('aten::eq', datetime.date(2020, 4, 1)),
|
||||
('aten::ne', datetime.date(2020, 4, 1)),
|
||||
('aten::lt', datetime.date(2020, 4, 1)),
|
||||
('aten::gt', datetime.date(2020, 4, 1)),
|
||||
('aten::le', datetime.date(2020, 4, 1)),
|
||||
('aten::ge', datetime.date(2020, 4, 1)),
|
||||
('aten::divmod', datetime.date(2020, 4, 1)),
|
||||
('aten::__upsample_bilinear', datetime.date(2020, 4, 1)),
|
||||
('aten::__upsample', datetime.date(2020, 4, 1)),
|
||||
('aten::__upsample_nearest', datetime.date(2020, 4, 1)),
|
||||
('aten::__interpolate', datetime.date(2020, 4, 1)),
|
||||
('aten::fabs', datetime.date(2020, 4, 1)),
|
||||
('aten::gamma', datetime.date(2020, 4, 1)),
|
||||
('prim::abs', datetime.date(2020, 4, 1)),
|
||||
('aten::factorial', datetime.date(2020, 4, 1)),
|
||||
('aten::radians', datetime.date(2020, 4, 1)),
|
||||
('aten::degrees', datetime.date(2020, 4, 1)),
|
||||
('prim::acosh', datetime.date(2020, 4, 1)),
|
||||
('prim::atanh', datetime.date(2020, 4, 1)),
|
||||
('aten::asinh', datetime.date(2020, 4, 1)),
|
||||
('aten::floordiv', datetime.date(2020, 4, 1)),
|
||||
('prim::NumToTensor', datetime.date(2020, 4, 1)),
|
||||
('aten::sin', datetime.date(2020, 4, 1)),
|
||||
('aten::round', datetime.date(2020, 4, 1)),
|
||||
('aten::remainder', datetime.date(2020, 4, 1)),
|
||||
('aten::isfinite', datetime.date(2020, 4, 1)),
|
||||
('aten::sub', datetime.date(2020, 4, 1)),
|
||||
('aten::sqrt', datetime.date(2020, 4, 1)),
|
||||
('aten::log1p', datetime.date(2020, 4, 1)),
|
||||
('aten::acos', datetime.date(2020, 4, 1)),
|
||||
('aten::floor', datetime.date(2020, 4, 1)),
|
||||
('aten::exp', datetime.date(2020, 4, 1)),
|
||||
('aten::tan', datetime.date(2020, 4, 1)),
|
||||
('aten::sinh', datetime.date(2020, 4, 1)),
|
||||
('aten::ceil', datetime.date(2020, 4, 1)),
|
||||
('aten::atan', datetime.date(2020, 4, 1)),
|
||||
('aten::erf', datetime.date(2020, 4, 1)),
|
||||
('aten::erfc', datetime.date(2020, 4, 1)),
|
||||
('aten::cosh', datetime.date(2020, 4, 1)),
|
||||
('aten::expm1', datetime.date(2020, 4, 1)),
|
||||
('aten::isinf', datetime.date(2020, 4, 1)),
|
||||
('aten::lgamma', datetime.date(2020, 4, 1)),
|
||||
('aten::asin', datetime.date(2020, 4, 1)),
|
||||
('aten::log', datetime.date(2020, 4, 1)),
|
||||
('aten::log10', datetime.date(2020, 4, 1)),
|
||||
('aten::cos', datetime.date(2020, 4, 1)),
|
||||
('aten::tanh', datetime.date(2020, 4, 1)),
|
||||
('prim::min', datetime.date(2020, 4, 1)),
|
||||
('prim::max', datetime.date(2020, 4, 1)),
|
||||
('aten::_linear_packed', datetime.date(2020, 4, 1)),
|
||||
('aten::_linear_prepack', datetime.date(2020, 4, 1)),
|
||||
('aten::_conv2d_packed', datetime.date(2020, 4, 1)),
|
||||
('aten::_conv2d_prepack', datetime.date(2020, 4, 1)),
|
||||
('aten::confirmed_by_owner', datetime.date(2020, 3, 17)),
|
||||
('aten::owner', datetime.date(2020, 3, 27)),
|
||||
('aten::owner_name', datetime.date(2020, 3, 27)),
|
||||
('_caffe2', datetime.date(9999, 1, 1)),
|
||||
('_aten', datetime.date(9999, 1, 1)),
|
||||
('prim::', datetime.date(9999, 1, 1)),
|
||||
('onnx::', datetime.date(9999, 1, 1)),
|
||||
('aten::_set_item', datetime.date(9999, 1, 1)),
|
||||
('aten::setdefault', datetime.date(9999, 1, 1)),
|
||||
('aten::_test_optional_float', datetime.date(9999, 1, 1)),
|
||||
('aten::__upsample', datetime.date(9999, 1, 1)),
|
||||
('aten::__interpolate', datetime.date(9999, 1, 1)),
|
||||
('aten::divmod', datetime.date(9999, 1, 1)),
|
||||
('aten::fabs', datetime.date(9999, 1, 1)),
|
||||
('aten::gamma', datetime.date(9999, 1, 1)),
|
||||
('aten::abs', datetime.date(9999, 1, 1)),
|
||||
('aten::isinf', datetime.date(9999, 1, 1)),
|
||||
('aten::factorial', datetime.date(9999, 1, 1)),
|
||||
('aten::radians', datetime.date(9999, 1, 1)),
|
||||
('aten::degrees', datetime.date(9999, 1, 1)),
|
||||
('aten::acosh', datetime.date(9999, 1, 1)),
|
||||
('aten::atanh', datetime.date(9999, 1, 1)),
|
||||
('aten::asinh', datetime.date(9999, 1, 1)),
|
||||
('aten::floordiv', datetime.date(9999, 1, 1)),
|
||||
('aten::sorted', datetime.date(9999, 1, 1)),
|
||||
('aten::__contains__', datetime.date(9999, 1, 1)),
|
||||
('aten::count', datetime.date(9999, 1, 1)),
|
||||
('aten::remove', datetime.date(9999, 1, 1)),
|
||||
('aten::pop', datetime.date(9999, 1, 1)),
|
||||
('aten::insert', datetime.date(9999, 1, 1)),
|
||||
('aten::clear', datetime.date(9999, 1, 1)),
|
||||
('aten::copy', datetime.date(9999, 1, 1)),
|
||||
('aten::extend', datetime.date(9999, 1, 1)),
|
||||
('aten::reverse', datetime.date(9999, 1, 1)),
|
||||
('aten::append', datetime.date(9999, 1, 1)),
|
||||
('aten::list', datetime.date(9999, 1, 1)),
|
||||
('aten::__getitem__', datetime.date(9999, 1, 1)),
|
||||
('aten::len', datetime.date(9999, 1, 1)),
|
||||
('aten::backward', datetime.date(9999, 1, 1)),
|
||||
('aten::Float', datetime.date(9999, 1, 1)),
|
||||
('aten::Int', datetime.date(9999, 1, 1)),
|
||||
('aten::Bool', datetime.date(9999, 1, 1)),
|
||||
('aten::_ncf_view', datetime.date(9999, 1, 1)),
|
||||
('aten::_ncf_unsqueeze', datetime.date(9999, 1, 1)),
|
||||
('quantized::mul_scalar_relu_out', datetime.date(9999, 1, 1)),
|
||||
('quantized::mul_scalar_out', datetime.date(9999, 1, 1)),
|
||||
('quantized::mul_relu_out', datetime.date(9999, 1, 1)),
|
||||
('quantized::mul_out', datetime.date(9999, 1, 1)),
|
||||
('aten::tan', datetime.date(9999, 1, 1)),
|
||||
('aten::sub', datetime.date(9999, 1, 1)),
|
||||
('aten::sqrt', datetime.date(9999, 1, 1)),
|
||||
('aten::sort', datetime.date(9999, 1, 1)),
|
||||
('aten::slice', datetime.date(9999, 1, 1)),
|
||||
('aten::sinh', datetime.date(9999, 1, 1)),
|
||||
('aten::sin', datetime.date(9999, 1, 1)),
|
||||
('aten::round', datetime.date(9999, 1, 1)),
|
||||
('aten::remainder', datetime.date(9999, 1, 1)),
|
||||
('aten::full_like', datetime.date(9999, 1, 1)),
|
||||
('aten::real', datetime.date(9999, 1, 1)),
|
||||
('aten::randn_like', datetime.date(9999, 1, 1)),
|
||||
('aten::pow', datetime.date(9999, 1, 1)),
|
||||
('aten::floor', datetime.date(9999, 1, 1)),
|
||||
('quantized::cat_relu_out', datetime.date(9999, 1, 1)),
|
||||
('quantized::cat_out', datetime.date(9999, 1, 1)),
|
||||
('aten::neg', datetime.date(9999, 1, 1)),
|
||||
('quantized::add_out', datetime.date(9999, 1, 1)),
|
||||
('aten::expm1', datetime.date(9999, 1, 1)),
|
||||
('aten::ceil', datetime.date(9999, 1, 1)),
|
||||
('aten::add', datetime.date(9999, 1, 1)),
|
||||
('aten::acos', datetime.date(9999, 1, 1)),
|
||||
('aten::cudnn_convolution', datetime.date(9999, 1, 1)),
|
||||
('aten::cudnn_convolution_backward', datetime.date(9999, 1, 1)),
|
||||
('aten::cudnn_convolution_transpose', datetime.date(9999, 1, 1)),
|
||||
('aten::cudnn_convolution_transpose_backward', datetime.date(9999, 1, 1)),
|
||||
('aten::cudnn_convolution_backward_bias', datetime.date(9999, 1, 1)),
|
||||
('aten::cudnn_convolution_transpose_backward_bias', datetime.date(9999, 1, 1)),
|
||||
('aten::atan', datetime.date(9999, 1, 1)),
|
||||
('aten::log10', datetime.date(9999, 1, 1)),
|
||||
('quantized::add_scalar_out', datetime.date(9999, 1, 1)),
|
||||
('quantized::add_scalar_relu_out', datetime.date(9999, 1, 1)),
|
||||
('quantized::add_relu_out', datetime.date(9999, 1, 1)),
|
||||
('aten::exp', datetime.date(9999, 1, 1)),
|
||||
('aten::cosh', datetime.date(9999, 1, 1)),
|
||||
('aten::erf', datetime.date(9999, 1, 1)),
|
||||
('aten::imag', datetime.date(9999, 1, 1)),
|
||||
('aten::empty_like', datetime.date(9999, 1, 1)),
|
||||
('aten::eq', datetime.date(9999, 1, 1)),
|
||||
('aten::index', datetime.date(9999, 1, 1)),
|
||||
('aten::isfinite', datetime.date(9999, 1, 1)),
|
||||
('aten::leaky_relu_backward', datetime.date(9999, 1, 1)),
|
||||
('aten::lgamma', datetime.date(9999, 1, 1)),
|
||||
('aten::log1p', datetime.date(9999, 1, 1)),
|
||||
('aten::asin', datetime.date(9999, 1, 1)),
|
||||
('aten::cos', datetime.date(9999, 1, 1)),
|
||||
('aten::log', datetime.date(9999, 1, 1)),
|
||||
('aten::mul', datetime.date(9999, 1, 1)),
|
||||
('aten::ne', datetime.date(9999, 1, 1)),
|
||||
('aten::rand_like', datetime.date(9999, 1, 1)),
|
||||
('aten::randint_like', datetime.date(9999, 1, 1)),
|
||||
('aten::rrelu_with_noise_backward', datetime.date(9999, 1, 1)),
|
||||
('aten::select', datetime.date(9999, 1, 1)),
|
||||
('aten::tanh', datetime.date(9999, 1, 1)),
|
||||
('aten::add_', datetime.date(9999, 1, 1)),
|
||||
('aten::ones_like', datetime.date(9999, 1, 1)),
|
||||
('aten::to', datetime.date(9999, 1, 1)),
|
||||
('aten::zeros_like', datetime.date(9999, 1, 1)),
|
||||
]
|
||||
|
||||
|
||||
@ -162,6 +171,15 @@ def check_bc(new_schema_dict):
|
||||
return is_bc
|
||||
|
||||
|
||||
blacklist = [
|
||||
"torch.classes",
|
||||
"Any",
|
||||
"RRef",
|
||||
"aten::setdefault",
|
||||
"aten::_set_item",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||
parser.add_argument(
|
||||
@ -176,6 +194,9 @@ if __name__ == '__main__':
|
||||
line = f.readline()
|
||||
if not line:
|
||||
break
|
||||
if any(w for w in blacklist if w in line):
|
||||
# TODO Fix type __torch__.torch.classes.xxx
|
||||
continue
|
||||
|
||||
s = parse_schema(line.strip())
|
||||
slist = new_schema_dict.get(s.name, [])
|
||||
|
@ -293,7 +293,7 @@ TEST_F(FunctionalTest, MultiLabelSoftMarginLossWeightedNoReduction) {
|
||||
auto input = torch::tensor({{0., 2., 2., 0.}, {2., 1., 0., 1.}}, torch::dtype(torch::kFloat).requires_grad(true));
|
||||
auto target = torch::tensor({{0., 0., 1., 0.}, {1., 0., 1., 1.}}, torch::kFloat);
|
||||
auto weight = torch::tensor({0.1, 0.6, 0.4, 0.8}, torch::kFloat);
|
||||
auto options = F::MultiLabelSoftMarginLossFuncOptions().reduction(torch::kNone).weight(weight);
|
||||
auto options = F::MultilabelSoftMarginLossFuncOptions().reduction(torch::kNone).weight(weight);
|
||||
auto output =
|
||||
F::multilabel_soft_margin_loss(input, target, options);
|
||||
auto expected = torch::tensor({0.4876902, 0.3321295}, torch::kFloat);
|
||||
@ -1875,7 +1875,7 @@ TEST_F(FunctionalTest, Interpolate) {
|
||||
// 1D interpolation
|
||||
auto input = torch::ones({1, 1, 2});
|
||||
auto options = F::InterpolateFuncOptions()
|
||||
.size({4})
|
||||
.size(std::vector<int64_t>({4}))
|
||||
.mode(torch::kNearest);
|
||||
auto output = F::interpolate(input, options);
|
||||
auto expected = torch::ones({1, 1, 4});
|
||||
@ -1889,7 +1889,7 @@ TEST_F(FunctionalTest, Interpolate) {
|
||||
for (const auto scale_factor : {0.5, 1.5, 2.0}) {
|
||||
auto input = torch::ones({1, 1, 2, 2});
|
||||
auto options = F::InterpolateFuncOptions()
|
||||
.scale_factor({scale_factor, scale_factor})
|
||||
.scale_factor(std::vector<double>({scale_factor, scale_factor}))
|
||||
.mode(torch::kBilinear)
|
||||
.align_corners(align_corners);
|
||||
auto output = F::interpolate(input, options);
|
||||
@ -1908,7 +1908,7 @@ TEST_F(FunctionalTest, Interpolate) {
|
||||
auto input = torch::ones({1, 1, 2, 2, 2});
|
||||
auto options =
|
||||
F::InterpolateFuncOptions()
|
||||
.scale_factor({scale_factor, scale_factor, scale_factor})
|
||||
.scale_factor(std::vector<double>({scale_factor, scale_factor, scale_factor}))
|
||||
.mode(torch::kTrilinear)
|
||||
.align_corners(align_corners);
|
||||
auto output = F::interpolate(input, options);
|
||||
@ -1924,13 +1924,13 @@ TEST_F(FunctionalTest, Interpolate) {
|
||||
{
|
||||
auto input = torch::randn({3, 2, 2});
|
||||
ASSERT_THROWS_WITH(
|
||||
F::interpolate(input[0], F::InterpolateFuncOptions().size({4, 4})),
|
||||
F::interpolate(input[0], F::InterpolateFuncOptions().size(std::vector<int64_t>({4, 4}))),
|
||||
"Input Error: Only 3D, 4D and 5D input Tensors supported (got 2D) "
|
||||
"for the modes: nearest | linear | bilinear | bicubic | trilinear (got kNearest)");
|
||||
ASSERT_THROWS_WITH(
|
||||
F::interpolate(
|
||||
torch::reshape(input, {1, 1, 1, 3, 2, 2}),
|
||||
F::InterpolateFuncOptions().size({1, 1, 1, 3, 4, 4})),
|
||||
F::InterpolateFuncOptions().size(std::vector<int64_t>({1, 1, 1, 3, 4, 4}))),
|
||||
"Input Error: Only 3D, 4D and 5D input Tensors supported (got 6D) "
|
||||
"for the modes: nearest | linear | bilinear | bicubic | trilinear (got kNearest)");
|
||||
ASSERT_THROWS_WITH(
|
||||
@ -1939,12 +1939,12 @@ TEST_F(FunctionalTest, Interpolate) {
|
||||
ASSERT_THROWS_WITH(
|
||||
F::interpolate(
|
||||
input,
|
||||
F::InterpolateFuncOptions().size({3, 4, 4}).scale_factor({0.5})),
|
||||
F::InterpolateFuncOptions().size(std::vector<int64_t>({3, 4, 4})).scale_factor(std::vector<double>({0.5}))),
|
||||
"only one of size or scale_factor should be defined");
|
||||
ASSERT_THROWS_WITH(
|
||||
F::interpolate(input, F::InterpolateFuncOptions().scale_factor({3, 2})),
|
||||
F::interpolate(input, F::InterpolateFuncOptions().scale_factor(std::vector<double>({3, 2}))),
|
||||
"scale_factor shape must match input shape. "
|
||||
"Input is 1D, scale_factor size is 2");
|
||||
"Input is 1D, scale_factor size is [3, 2]");
|
||||
ASSERT_THROWS_WITH(
|
||||
F::interpolate(
|
||||
input,
|
||||
@ -2328,9 +2328,15 @@ TEST_F(FunctionalTest, AlphaDropout) {
|
||||
auto input_std = input.std();
|
||||
|
||||
for (const auto rate : {0.2, 0.5, 0.8}) {
|
||||
auto output = F::alpha_dropout(input, F::AlphaDropoutFuncOptions().p(rate).training(false));
|
||||
ASSERT_TRUE(torch::allclose(input_mean, output.mean(), 0.1));
|
||||
ASSERT_TRUE(torch::allclose(input_std, output.std(), 0.1));
|
||||
for (const auto inplace : {false, true}) {
|
||||
auto input_ = input.clone();
|
||||
auto output = F::alpha_dropout(input_, F::AlphaDropoutFuncOptions().p(rate).training(false).inplace(inplace));
|
||||
ASSERT_TRUE(torch::allclose(input_mean, output.mean(), 0.1));
|
||||
ASSERT_TRUE(torch::allclose(input_std, output.std(), 0.1));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(input_, output));
|
||||
}
|
||||
}
|
||||
}
|
||||
auto output = F::detail::alpha_dropout(input, 0.5, false, false);
|
||||
ASSERT_TRUE(torch::allclose(input_mean, output.mean(), 0.1));
|
||||
@ -2343,9 +2349,15 @@ TEST_F(FunctionalTest, FeatureAlphaDropout) {
|
||||
auto input_std = input.std();
|
||||
|
||||
for (const auto rate : {0.2, 0.5, 0.8}) {
|
||||
auto output = F::feature_alpha_dropout(input, F::FeatureAlphaDropoutFuncOptions().p(rate).training(false));
|
||||
ASSERT_TRUE(torch::allclose(input_mean, output.mean(), 0.1));
|
||||
ASSERT_TRUE(torch::allclose(input_std, output.std(), 0.1));
|
||||
for (const auto inplace : {false, true}) {
|
||||
auto input_ = input.clone();
|
||||
auto output = F::feature_alpha_dropout(input_, F::FeatureAlphaDropoutFuncOptions().p(rate).training(false).inplace(inplace));
|
||||
ASSERT_TRUE(torch::allclose(input_mean, output.mean(), 0.1));
|
||||
ASSERT_TRUE(torch::allclose(input_std, output.std(), 0.1));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(input_, output));
|
||||
}
|
||||
}
|
||||
}
|
||||
auto output = F::feature_alpha_dropout(input);
|
||||
ASSERT_TRUE(torch::allclose(input_mean, output.mean(), 0.1));
|
||||
|
@ -1300,54 +1300,81 @@ TEST_F(ModulesTest, FeatureAlphaDropout) {
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, Dropout) {
|
||||
Dropout dropout(0.5);
|
||||
torch::Tensor x = torch::ones(100, torch::requires_grad());
|
||||
torch::Tensor y = dropout(x);
|
||||
for (const auto inplace : {false, true}) {
|
||||
Dropout dropout(DropoutOptions(0.5).inplace(inplace));
|
||||
torch::Tensor x = torch::ones(100);
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
torch::Tensor y = dropout(x);
|
||||
|
||||
y.backward(torch::ones_like(y));
|
||||
ASSERT_EQ(y.ndimension(), 1);
|
||||
ASSERT_EQ(y.size(0), 100);
|
||||
ASSERT_LT(y.sum().item<float>(), 130); // Probably
|
||||
ASSERT_GT(y.sum().item<float>(), 70); // Probably
|
||||
ASSERT_EQ(y.ndimension(), 1);
|
||||
ASSERT_EQ(y.size(0), 100);
|
||||
ASSERT_LT(y.sum().item<float>(), 130); // Probably
|
||||
ASSERT_GT(y.sum().item<float>(), 70); // Probably
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(y.allclose(x));
|
||||
} else {
|
||||
y.backward(torch::ones_like(y));
|
||||
}
|
||||
|
||||
dropout->eval();
|
||||
y = dropout(x);
|
||||
ASSERT_EQ(y.sum().item<float>(), 100);
|
||||
dropout->eval();
|
||||
y = dropout(torch::ones(100));
|
||||
ASSERT_EQ(y.sum().item<float>(), 100);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, Dropout2d) {
|
||||
Dropout2d dropout(0.5);
|
||||
torch::Tensor x = torch::ones({10, 10}, torch::requires_grad());
|
||||
torch::Tensor y = dropout(x);
|
||||
for (const auto inplace : {false, true}) {
|
||||
Dropout2d dropout(Dropout2dOptions(0.5).inplace(inplace));
|
||||
torch::Tensor x = torch::ones({10, 10});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
torch::Tensor y = dropout(x);
|
||||
|
||||
y.backward(torch::ones_like(y));
|
||||
ASSERT_EQ(y.ndimension(), 2);
|
||||
ASSERT_EQ(y.size(0), 10);
|
||||
ASSERT_EQ(y.size(1), 10);
|
||||
ASSERT_LT(y.sum().item<float>(), 130); // Probably
|
||||
ASSERT_GT(y.sum().item<float>(), 70); // Probably
|
||||
ASSERT_EQ(y.ndimension(), 2);
|
||||
ASSERT_EQ(y.size(0), 10);
|
||||
ASSERT_EQ(y.size(1), 10);
|
||||
ASSERT_LT(y.sum().item<float>(), 130); // Probably
|
||||
ASSERT_GT(y.sum().item<float>(), 70); // Probably
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(y.allclose(x));
|
||||
} else {
|
||||
y.backward(torch::ones_like(y));
|
||||
}
|
||||
|
||||
dropout->eval();
|
||||
y = dropout(x);
|
||||
ASSERT_EQ(y.sum().item<float>(), 100);
|
||||
dropout->eval();
|
||||
y = dropout(torch::ones({10, 10}));
|
||||
ASSERT_EQ(y.sum().item<float>(), 100);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, Dropout3d) {
|
||||
Dropout3d dropout(0.5);
|
||||
torch::Tensor x = torch::ones({4, 5, 5}, torch::requires_grad());
|
||||
torch::Tensor y = dropout(x);
|
||||
for (const auto inplace : {false, true}) {
|
||||
Dropout3d dropout(Dropout3dOptions(0.5).inplace(inplace));
|
||||
torch::Tensor x = torch::ones({4, 5, 5});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
torch::Tensor y = dropout(x);
|
||||
|
||||
y.backward(torch::ones_like(y));
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.size(0), 4);
|
||||
ASSERT_EQ(y.size(1), 5);
|
||||
ASSERT_EQ(y.size(1), 5);
|
||||
ASSERT_LT(y.sum().item<float>(), 130); // Probably
|
||||
ASSERT_GT(y.sum().item<float>(), 70); // Probably
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.size(0), 4);
|
||||
ASSERT_EQ(y.size(1), 5);
|
||||
ASSERT_EQ(y.size(1), 5);
|
||||
ASSERT_LT(y.sum().item<float>(), 130); // Probably
|
||||
ASSERT_GT(y.sum().item<float>(), 70); // Probably
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(y.allclose(x));
|
||||
} else {
|
||||
y.backward(torch::ones_like(y));
|
||||
}
|
||||
|
||||
dropout->eval();
|
||||
y = dropout(x);
|
||||
ASSERT_EQ(y.sum().item<float>(), 100);
|
||||
dropout->eval();
|
||||
y = dropout(torch::ones({4, 5, 5}));
|
||||
ASSERT_EQ(y.sum().item<float>(), 100);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, Parameters) {
|
||||
@ -2147,38 +2174,58 @@ TEST_F(ModulesTest, PairwiseDistance) {
|
||||
TEST_F(ModulesTest, ELU) {
|
||||
const auto size = 3;
|
||||
for (const auto alpha : {0.0, 0.42, 1.0, 4.2, 42.42}) {
|
||||
ELU model {ELUOptions().alpha(alpha)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
ELU model {ELUOptions().alpha(alpha).inplace(inplace)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = torch::max(torch::zeros_like(x), x) +
|
||||
torch::min(torch::zeros_like(x), alpha * (torch::exp(x) - 1.0));
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = torch::max(torch::zeros_like(x_orig), x_orig) +
|
||||
torch::min(torch::zeros_like(x_orig), alpha * (torch::exp(x_orig) - 1.0));
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, SELU) {
|
||||
SELU model;
|
||||
auto input = torch::randn({5, 5}, torch::requires_grad());
|
||||
auto output = model->forward(input);
|
||||
const double scale = 1.0507009873554804934193349852946;
|
||||
const double alpha = 1.6732632423543772848170429916717;
|
||||
auto zero = torch::zeros_like(input);
|
||||
auto expected = scale *
|
||||
(torch::max(zero, input) +
|
||||
torch::min(zero, alpha * (torch::exp(input) - 1)));
|
||||
auto s = output.sum();
|
||||
s.backward();
|
||||
for (const auto inplace : {false, true}) {
|
||||
SELU model(inplace);
|
||||
auto input = torch::randn({5, 5});
|
||||
if (!inplace) {
|
||||
input.requires_grad_(true);
|
||||
}
|
||||
auto input_orig = input.clone();
|
||||
auto output = model->forward(input);
|
||||
const double scale = 1.0507009873554804934193349852946;
|
||||
const double alpha = 1.6732632423543772848170429916717;
|
||||
auto zero = torch::zeros_like(input);
|
||||
auto expected = scale *
|
||||
(torch::max(zero, input_orig) +
|
||||
torch::min(zero, alpha * (torch::exp(input_orig) - 1)));
|
||||
auto s = output.sum();
|
||||
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_TRUE(output.allclose(expected));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_TRUE(output.allclose(expected));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(input.allclose(expected));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, Hardshrink) {
|
||||
@ -2192,7 +2239,6 @@ TEST_F(ModulesTest, Hardshrink) {
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x.abs() > lambda) * x;
|
||||
@ -2204,21 +2250,30 @@ TEST_F(ModulesTest, Hardtanh) {
|
||||
const auto size = 3;
|
||||
for (const auto min_val : {-4.2, -1.0, -0.42, 0.0}) {
|
||||
for (const auto max_val : {0.42, 1.0, 4.2}) {
|
||||
Hardtanh model {HardtanhOptions().min_val(min_val).max_val(max_val)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
Hardtanh model {HardtanhOptions().min_val(min_val).max_val(max_val).inplace(inplace)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x < min_val) * min_val +
|
||||
((x >= min_val) * (x <= max_val)) * x +
|
||||
(x > max_val) * max_val;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x_orig < min_val) * min_val +
|
||||
((x_orig >= min_val) * (x_orig <= max_val)) * x_orig +
|
||||
(x_orig > max_val) * max_val;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2238,20 +2293,29 @@ TEST_F(ModulesTest, HardtanhMinValGEMaxVal) {
|
||||
|
||||
TEST_F(ModulesTest, LeakyReLU) {
|
||||
const auto size = 3;
|
||||
for (const auto negative_slope : {0.0, 0.42, 1.0}) {
|
||||
LeakyReLU model {LeakyReLUOptions().negative_slope(negative_slope)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
for (const auto negative_slope : {0.0, 0.42, 1.0}) {
|
||||
LeakyReLU model {LeakyReLUOptions().negative_slope(negative_slope).inplace(inplace)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x < 0) * x * negative_slope + (x >= 0) * x;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x_orig < 0) * x_orig * negative_slope + (x_orig >= 0) * x_orig;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2394,78 +2458,114 @@ TEST_F(ModulesTest, PReLU) {
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, ReLU) {
|
||||
const auto size = 3;
|
||||
ReLU model;
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
const auto size = 3;
|
||||
ReLU model(inplace);
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x < 0) * 0 + (x >= 0) * x;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x_orig < 0) * 0 + (x_orig >= 0) * x_orig;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, ReLU6) {
|
||||
const auto size = 3;
|
||||
ReLU6 model;
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
const auto size = 3;
|
||||
ReLU6 model(inplace);
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x < 0) * 0 + ((x >= 0) * (x <= 6)) * x + (x > 6) * 6;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = (x_orig < 0) * 0 + ((x_orig >= 0) * (x_orig <= 6)) * x_orig + (x_orig > 6) * 6;
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, RReLU) {
|
||||
const auto size = 3;
|
||||
for (const auto lower : {0.01, 0.1, 0.2}) {
|
||||
for (const auto upper : {0.3, 0.4, 0.5}) {
|
||||
RReLU model {RReLUOptions().lower(lower).upper(upper)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
RReLU model {RReLUOptions().lower(lower).upper(upper).inplace(inplace)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto z = ((x >= 0) * (x == y) +
|
||||
(x < 0) * (y >= x * upper) * (y <= lower * x)) * 1.0;
|
||||
ASSERT_TRUE(torch::allclose(z, torch::ones_like(z)));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto z = ((x_orig >= 0) * (x_orig == y) +
|
||||
(x_orig < 0) * (y >= x_orig * upper) * (y <= lower * x_orig)) * 1.0;
|
||||
ASSERT_TRUE(torch::allclose(z, torch::ones_like(z)));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, CELU) {
|
||||
const auto size = 3;
|
||||
for (const auto alpha : {0.42, 1.0, 4.2, 42.42}) {
|
||||
CELU model {CELUOptions().alpha(alpha)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size}).set_requires_grad(true);
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
for (const auto inplace : {false, true}) {
|
||||
for (const auto alpha : {0.42, 1.0, 4.2, 42.42}) {
|
||||
CELU model {CELUOptions().alpha(alpha).inplace(inplace)};
|
||||
auto x = torch::linspace(-10.0, 10.0, size * size * size);
|
||||
x.resize_({size, size, size});
|
||||
if (!inplace) {
|
||||
x.requires_grad_(true);
|
||||
}
|
||||
auto x_orig = x.clone();
|
||||
auto y = model(x);
|
||||
torch::Tensor s = y.sum();
|
||||
|
||||
s.backward();
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = torch::max(torch::zeros_like(x), x) +
|
||||
torch::min(torch::zeros_like(x), alpha * (torch::exp(x / alpha) - 1.0));
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
ASSERT_EQ(s.ndimension(), 0);
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
auto y_exp = torch::max(torch::zeros_like(x_orig), x_orig) +
|
||||
torch::min(torch::zeros_like(x_orig), alpha * (torch::exp(x_orig / alpha) - 1.0));
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
} else {
|
||||
s.backward();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2597,12 +2697,16 @@ TEST_F(ModulesTest, Threshold) {
|
||||
Threshold model {ThresholdOptions(threshold, value).inplace(inplace)};
|
||||
auto x = torch::linspace(-3.0, 3.0, 61);
|
||||
x.resize_({size, size, size});
|
||||
auto y_exp = (x <= threshold) * value + (x > threshold) * x;
|
||||
auto x_orig = x.clone();
|
||||
auto y_exp = (x_orig <= threshold) * value + (x_orig > threshold) * x_orig;
|
||||
auto y = model(x);
|
||||
|
||||
ASSERT_EQ(y.ndimension(), 3);
|
||||
ASSERT_EQ(y.sizes(), std::vector<int64_t>({size, size, size}));
|
||||
ASSERT_TRUE(torch::allclose(y, y_exp));
|
||||
if (inplace) {
|
||||
ASSERT_TRUE(torch::allclose(x, y_exp));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2611,7 +2715,7 @@ TEST_F(ModulesTest, Threshold) {
|
||||
TEST_F(ModulesTest, Upsampling1D) {
|
||||
{
|
||||
Upsample model(UpsampleOptions()
|
||||
.size({4})
|
||||
.size(std::vector<int64_t>({4}))
|
||||
.mode(torch::kNearest));
|
||||
auto input = torch::ones({1, 1, 2}, torch::requires_grad());
|
||||
auto output = model->forward(input);
|
||||
@ -2627,7 +2731,7 @@ TEST_F(ModulesTest, Upsampling1D) {
|
||||
// test float scale factor up & down sampling
|
||||
for (const auto scale_factor : {0.5, 1.5, 2.0}) {
|
||||
Upsample model(UpsampleOptions()
|
||||
.scale_factor({scale_factor})
|
||||
.scale_factor(std::vector<double>({scale_factor}))
|
||||
.mode(torch::kLinear)
|
||||
.align_corners(align_corners));
|
||||
auto input = torch::ones({1, 1, 2}, torch::requires_grad());
|
||||
@ -2646,7 +2750,7 @@ TEST_F(ModulesTest, Upsampling1D) {
|
||||
{
|
||||
// linear (1D) upsampling spatial invariance
|
||||
Upsample model(UpsampleOptions()
|
||||
.scale_factor({3})
|
||||
.scale_factor(std::vector<double>({3}))
|
||||
.mode(torch::kLinear)
|
||||
.align_corners(false));
|
||||
auto input = torch::zeros({1, 1, 9});
|
||||
@ -2661,7 +2765,7 @@ TEST_F(ModulesTest, Upsampling1D) {
|
||||
TEST_F(ModulesTest, Upsampling2D) {
|
||||
{
|
||||
Upsample model(UpsampleOptions()
|
||||
.size({4, 4})
|
||||
.size(std::vector<int64_t>({4, 4}))
|
||||
.mode(torch::kNearest));
|
||||
auto input = torch::ones({1, 1, 2, 2}, torch::requires_grad());
|
||||
auto output = model->forward(input);
|
||||
@ -2677,7 +2781,7 @@ TEST_F(ModulesTest, Upsampling2D) {
|
||||
// test float scale factor up & down sampling
|
||||
for (const auto scale_factor : {0.5, 1.5, 2.0}) {
|
||||
Upsample model(UpsampleOptions()
|
||||
.scale_factor({scale_factor, scale_factor})
|
||||
.scale_factor(std::vector<double>({scale_factor, scale_factor}))
|
||||
.mode(torch::kBilinear)
|
||||
.align_corners(align_corners));
|
||||
auto input = torch::ones({1, 1, 2, 2}, torch::requires_grad());
|
||||
@ -2698,7 +2802,7 @@ TEST_F(ModulesTest, Upsampling2D) {
|
||||
// test float scale factor up & down sampling
|
||||
for (const auto scale_factor : {0.5, 1.5, 2.0}) {
|
||||
Upsample model(UpsampleOptions()
|
||||
.scale_factor({scale_factor, scale_factor})
|
||||
.scale_factor(std::vector<double>({scale_factor, scale_factor}))
|
||||
.mode(torch::kBicubic)
|
||||
.align_corners(align_corners));
|
||||
auto input = torch::ones({1, 1, 2, 2}, torch::requires_grad());
|
||||
@ -2719,7 +2823,7 @@ TEST_F(ModulesTest, Upsampling2D) {
|
||||
TEST_F(ModulesTest, Upsampling3D) {
|
||||
{
|
||||
Upsample model(UpsampleOptions()
|
||||
.size({4, 4, 4})
|
||||
.size(std::vector<int64_t>({4, 4, 4}))
|
||||
.mode(torch::kNearest));
|
||||
auto input = torch::ones({1, 1, 2, 2, 2}, torch::requires_grad());
|
||||
auto output = model->forward(input);
|
||||
@ -2736,7 +2840,7 @@ TEST_F(ModulesTest, Upsampling3D) {
|
||||
for (const auto scale_factor : {0.5, 1.5, 2.0}) {
|
||||
Upsample model(
|
||||
UpsampleOptions()
|
||||
.scale_factor({scale_factor, scale_factor, scale_factor})
|
||||
.scale_factor(std::vector<double>({scale_factor, scale_factor, scale_factor}))
|
||||
.mode(torch::kTrilinear)
|
||||
.align_corners(align_corners));
|
||||
auto input = torch::ones({1, 1, 2, 2, 2}, torch::requires_grad());
|
||||
@ -3876,10 +3980,10 @@ TEST_F(ModulesTest, PrettyPrintConvTranspose) {
|
||||
|
||||
TEST_F(ModulesTest, PrettyPrintUpsample) {
|
||||
ASSERT_EQ(
|
||||
c10::str(Upsample(UpsampleOptions().size({2, 4, 4}))),
|
||||
c10::str(Upsample(UpsampleOptions().size(std::vector<int64_t>({2, 4, 4})))),
|
||||
"torch::nn::Upsample(size=[2, 4, 4], mode=kNearest)");
|
||||
ASSERT_EQ(
|
||||
c10::str(Upsample(UpsampleOptions().scale_factor({0.5, 1.5}).mode(torch::kBilinear))),
|
||||
c10::str(Upsample(UpsampleOptions().scale_factor(std::vector<double>({0.5, 1.5})).mode(torch::kBilinear))),
|
||||
"torch::nn::Upsample(scale_factor=[0.5, 1.5], mode=kBilinear)");
|
||||
}
|
||||
|
||||
@ -3987,15 +4091,27 @@ TEST_F(ModulesTest, PrettyPrintAdaptiveMaxPool) {
|
||||
c10::str(AdaptiveMaxPool2d(5)),
|
||||
"torch::nn::AdaptiveMaxPool2d(output_size=[5, 5])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool2d(std::vector<int64_t>{5, 6})),
|
||||
c10::str(AdaptiveMaxPool2d(AdaptiveMaxPool2dOptions({5, 6}))),
|
||||
"torch::nn::AdaptiveMaxPool2d(output_size=[5, 6])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool2d(AdaptiveMaxPool2dOptions({5, c10::nullopt}))),
|
||||
"torch::nn::AdaptiveMaxPool2d(output_size=[5, None])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool2d(AdaptiveMaxPool2dOptions({c10::nullopt, c10::nullopt}))),
|
||||
"torch::nn::AdaptiveMaxPool2d(output_size=[None, None])");
|
||||
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool3d(5)),
|
||||
"torch::nn::AdaptiveMaxPool3d(output_size=[5, 5, 5])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool3d(std::vector<int64_t>{5, 6, 7})),
|
||||
c10::str(AdaptiveMaxPool3d(AdaptiveMaxPool3dOptions({5, 6, 7}))),
|
||||
"torch::nn::AdaptiveMaxPool3d(output_size=[5, 6, 7])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool3d(AdaptiveMaxPool3dOptions({5, c10::nullopt, 7}))),
|
||||
"torch::nn::AdaptiveMaxPool3d(output_size=[5, None, 7])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveMaxPool3d(AdaptiveMaxPool3dOptions({c10::nullopt, c10::nullopt, c10::nullopt}))),
|
||||
"torch::nn::AdaptiveMaxPool3d(output_size=[None, None, None])");
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, PrettyPrintAdaptiveAvgPool) {
|
||||
@ -4007,15 +4123,27 @@ TEST_F(ModulesTest, PrettyPrintAdaptiveAvgPool) {
|
||||
c10::str(AdaptiveAvgPool2d(5)),
|
||||
"torch::nn::AdaptiveAvgPool2d(output_size=[5, 5])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool2d(std::vector<int64_t>{5, 6})),
|
||||
c10::str(AdaptiveAvgPool2d(AdaptiveAvgPool2dOptions({5, 6}))),
|
||||
"torch::nn::AdaptiveAvgPool2d(output_size=[5, 6])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool2d(AdaptiveAvgPool2dOptions({5, c10::nullopt}))),
|
||||
"torch::nn::AdaptiveAvgPool2d(output_size=[5, None])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool2d(AdaptiveAvgPool2dOptions({c10::nullopt, c10::nullopt}))),
|
||||
"torch::nn::AdaptiveAvgPool2d(output_size=[None, None])");
|
||||
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool3d(5)),
|
||||
"torch::nn::AdaptiveAvgPool3d(output_size=[5, 5, 5])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool3d(std::vector<int64_t>{5, 6, 7})),
|
||||
c10::str(AdaptiveAvgPool3d(AdaptiveAvgPool3dOptions({5, 6, 7}))),
|
||||
"torch::nn::AdaptiveAvgPool3d(output_size=[5, 6, 7])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool3d(AdaptiveAvgPool3dOptions({5, c10::nullopt, 7}))),
|
||||
"torch::nn::AdaptiveAvgPool3d(output_size=[5, None, 7])");
|
||||
ASSERT_EQ(
|
||||
c10::str(AdaptiveAvgPool3d(AdaptiveAvgPool3dOptions({c10::nullopt, c10::nullopt, c10::nullopt}))),
|
||||
"torch::nn::AdaptiveAvgPool3d(output_size=[None, None, None])");
|
||||
}
|
||||
|
||||
TEST_F(ModulesTest, PrettyPrintMaxUnpool) {
|
||||
|
@ -26,7 +26,7 @@ bool test_optimizer_xor(Options options) {
|
||||
Linear(8, 1),
|
||||
Functional(torch::sigmoid));
|
||||
|
||||
const int64_t kBatchSize = 4;
|
||||
const int64_t kBatchSize = 200;
|
||||
const int64_t kMaximumNumberOfEpochs = 3000;
|
||||
|
||||
OptimizerClass optimizer(model->parameters(), options);
|
||||
@ -40,13 +40,21 @@ bool test_optimizer_xor(Options options) {
|
||||
inputs[i] = torch::randint(2, {2}, torch::kInt64);
|
||||
labels[i] = inputs[i][0].item<int64_t>() ^ inputs[i][1].item<int64_t>();
|
||||
}
|
||||
inputs.set_requires_grad(true);
|
||||
optimizer.zero_grad();
|
||||
auto x = model->forward(inputs);
|
||||
torch::Tensor loss = torch::binary_cross_entropy(x, labels);
|
||||
loss.backward();
|
||||
|
||||
optimizer.step();
|
||||
inputs.set_requires_grad(true);
|
||||
|
||||
auto step = [&](OptimizerClass& optimizer, Sequential model, torch::Tensor inputs, torch::Tensor labels) {
|
||||
auto closure = [&]() {
|
||||
optimizer.zero_grad();
|
||||
auto x = model->forward(inputs);
|
||||
auto loss = torch::binary_cross_entropy(x, labels);
|
||||
loss.backward();
|
||||
return loss;
|
||||
};
|
||||
return optimizer.step(closure);
|
||||
};
|
||||
|
||||
torch::Tensor loss = step(optimizer, model, inputs, labels);
|
||||
|
||||
running_loss = running_loss * 0.99 + loss.item<float>() * 0.01;
|
||||
if (epoch > kMaximumNumberOfEpochs) {
|
||||
@ -166,30 +174,66 @@ TEST(OptimTest, OptimizerAccessors) {
|
||||
optimizer_.state();
|
||||
}
|
||||
|
||||
TEST(OptimTest, BasicInterface) {
|
||||
#define OLD_INTERFACE_WARNING_CHECK(func) \
|
||||
{ \
|
||||
std::stringstream buffer;\
|
||||
torch::test::CerrRedirect cerr_redirect(buffer.rdbuf());\
|
||||
func;\
|
||||
ASSERT_EQ(\
|
||||
torch::test::count_substr_occurrences(\
|
||||
buffer.str(),\
|
||||
"will be removed"\
|
||||
),\
|
||||
1);\
|
||||
}
|
||||
|
||||
struct MyOptimizerOptions : public OptimizerCloneableOptions<MyOptimizerOptions> {
|
||||
MyOptimizerOptions(double lr = 1.0) : lr_(lr) {};
|
||||
TORCH_ARG(double, lr) = 1.0;
|
||||
};
|
||||
|
||||
TEST(OptimTest, OldInterface) {
|
||||
struct MyOptimizer : Optimizer {
|
||||
using Optimizer::Optimizer;
|
||||
torch::Tensor step(LossClosure closure = nullptr) override { return {};}
|
||||
explicit MyOptimizer(
|
||||
std::vector<at::Tensor> params, MyOptimizerOptions defaults = {}) :
|
||||
Optimizer({std::move(OptimizerParamGroup(params))}, std::make_unique<MyOptimizerOptions>(defaults)) {}
|
||||
};
|
||||
std::vector<torch::Tensor> parameters = {
|
||||
torch::ones({2, 3}), torch::zeros({2, 3}), torch::rand({2, 3})};
|
||||
{
|
||||
MyOptimizer optimizer(parameters);
|
||||
ASSERT_EQ(optimizer.size(), parameters.size());
|
||||
size_t size;
|
||||
OLD_INTERFACE_WARNING_CHECK(size = optimizer.size());
|
||||
ASSERT_EQ(size, parameters.size());
|
||||
}
|
||||
{
|
||||
MyOptimizer optimizer;
|
||||
ASSERT_EQ(optimizer.size(), 0);
|
||||
optimizer.add_parameters(parameters);
|
||||
ASSERT_EQ(optimizer.size(), parameters.size());
|
||||
for (size_t p = 0; p < parameters.size(); ++p) {
|
||||
ASSERT_TRUE(optimizer.parameters()[p].allclose(parameters[p]));
|
||||
std::vector<at::Tensor> params;
|
||||
MyOptimizer optimizer(params);
|
||||
|
||||
size_t size;
|
||||
OLD_INTERFACE_WARNING_CHECK(size = optimizer.size());
|
||||
ASSERT_EQ(size, 0);
|
||||
|
||||
OLD_INTERFACE_WARNING_CHECK(optimizer.add_parameters(parameters));
|
||||
|
||||
OLD_INTERFACE_WARNING_CHECK(size = optimizer.size());
|
||||
ASSERT_EQ(size, parameters.size());
|
||||
|
||||
std::vector<torch::Tensor> params_;
|
||||
OLD_INTERFACE_WARNING_CHECK(params_ = optimizer.parameters());
|
||||
for (size_t p = 0; p < size; ++p) {
|
||||
ASSERT_TRUE(params_[p].allclose(parameters[p]));
|
||||
}
|
||||
}
|
||||
{
|
||||
Linear linear(3, 4);
|
||||
MyOptimizer optimizer(linear->parameters());
|
||||
ASSERT_EQ(optimizer.size(), linear->parameters().size());
|
||||
|
||||
size_t size;
|
||||
OLD_INTERFACE_WARNING_CHECK(size = optimizer.size());
|
||||
ASSERT_EQ(size, linear->parameters().size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,6 +242,11 @@ TEST(OptimTest, XORConvergence_SGD) {
|
||||
SGDOptions(0.1).momentum(0.9).nesterov(true).weight_decay(1e-6)));
|
||||
}
|
||||
|
||||
TEST(OptimTest, XORConvergence_LBFGS) {
|
||||
ASSERT_TRUE(test_optimizer_xor<LBFGS>(LBFGSOptions(1.0)));
|
||||
ASSERT_TRUE(test_optimizer_xor<LBFGS>(LBFGSOptions(1.0).line_search_fn("strong_wolfe")));
|
||||
}
|
||||
|
||||
TEST(OptimTest, XORConvergence_Adagrad) {
|
||||
ASSERT_TRUE(test_optimizer_xor<Adagrad>(
|
||||
AdagradOptions(1.0).weight_decay(1e-6).lr_decay(1e-3)));
|
||||
@ -375,7 +424,7 @@ TEST(OptimTest, AddParameter_LBFGS) {
|
||||
}
|
||||
|
||||
LBFGS optimizer(std::vector<torch::Tensor>{}, 1.0);
|
||||
optimizer.add_parameters(parameters);
|
||||
OLD_INTERFACE_WARNING_CHECK(optimizer.add_parameters(parameters));
|
||||
|
||||
optimizer.step([]() { return torch::tensor(1); });
|
||||
|
||||
|
@ -64,7 +64,7 @@ void is_optimizer_state_equal(
|
||||
}
|
||||
|
||||
template <typename OptimizerClass, typename DerivedOptimizerOptions, typename DerivedOptimizerParamState>
|
||||
void test_serialize_optimizer(DerivedOptimizerOptions options) {
|
||||
void test_serialize_optimizer(DerivedOptimizerOptions options, bool only_has_global_state = false) {
|
||||
auto model1 = Linear(5, 2);
|
||||
auto model2 = Linear(5, 2);
|
||||
auto model3 = Linear(5, 2);
|
||||
@ -125,9 +125,11 @@ void test_serialize_optimizer(DerivedOptimizerOptions options) {
|
||||
auto& optim3_2_state = optim3_2.state();
|
||||
auto& optim3_state = optim3.state();
|
||||
|
||||
// optim3_2 and optim1 should have param_groups and state of size 1 and 2 respectively
|
||||
// optim3_2 and optim1 should have param_groups and state of size 1 and state_size respectively
|
||||
ASSERT_TRUE(optim3_2_param_groups.size() == 1);
|
||||
ASSERT_TRUE(optim3_2_state.size() == 2);
|
||||
// state_size = 2 for all optimizers except LBFGS as LBFGS only maintains one global state
|
||||
int state_size = only_has_global_state ? 1 : 2;
|
||||
ASSERT_TRUE(optim3_2_state.size() == state_size);
|
||||
|
||||
// optim3_2 and optim1 should have param_groups and state of same size
|
||||
ASSERT_TRUE(optim3_2_param_groups.size() == optim3_param_groups.size());
|
||||
@ -668,39 +670,16 @@ TEST(SerializeTest, Optim_RMSprop) {
|
||||
}
|
||||
|
||||
TEST(SerializeTest, Optim_LBFGS) {
|
||||
auto options = LBFGSOptions();
|
||||
test_serialize_optimizer<LBFGS, LBFGSOptions, LBFGSParamState>(LBFGSOptions(), true);
|
||||
// bc compatibility check
|
||||
auto model1 = Linear(5, 2);
|
||||
auto model2 = Linear(5, 2);
|
||||
auto model3 = Linear(5, 2);
|
||||
|
||||
// Models 1, 2, 3 will have the same parameters.
|
||||
auto model_tempfile = c10::make_tempfile();
|
||||
torch::save(model1, model_tempfile.name);
|
||||
torch::load(model2, model_tempfile.name);
|
||||
torch::load(model3, model_tempfile.name);
|
||||
|
||||
auto param1 = model1->named_parameters();
|
||||
auto param2 = model2->named_parameters();
|
||||
auto param3 = model3->named_parameters();
|
||||
for (const auto& p : param1) {
|
||||
ASSERT_TRUE(p->allclose(param2[p.key()]));
|
||||
ASSERT_TRUE(param2[p.key()].allclose(param3[p.key()]));
|
||||
}
|
||||
// Make some optimizers
|
||||
auto optim1 = LBFGS(
|
||||
{torch::optim::OptimizerParamGroup(model1->parameters())}, options);
|
||||
auto optim2 = LBFGS(
|
||||
model2->parameters(), options);
|
||||
auto optim2_2 = LBFGS(
|
||||
model2->parameters(), options);
|
||||
auto optim3 = LBFGS(
|
||||
model3->parameters(), options);
|
||||
auto optim3_2 = LBFGS(
|
||||
model3->parameters(), options);
|
||||
auto model1_params = model1->parameters();
|
||||
// added a tensor for lazy init check - when all params do not have entry in buffers
|
||||
model1_params.emplace_back(torch::randn({2,3}));
|
||||
auto optim1 = torch::optim::LBFGS(model1_params, torch::optim::LBFGSOptions());
|
||||
|
||||
auto x = torch::ones({10, 5});
|
||||
|
||||
auto step = [&x](torch::optim::LossClosureOptimizer& optimizer, Linear model) {
|
||||
auto step = [&x](torch::optim::Optimizer& optimizer, Linear model) {
|
||||
optimizer.zero_grad();
|
||||
auto y = model->forward(x).sum();
|
||||
y.backward();
|
||||
@ -708,56 +687,47 @@ TEST(SerializeTest, Optim_LBFGS) {
|
||||
optimizer.step(closure);
|
||||
};
|
||||
|
||||
// Do 2 steps of model1
|
||||
step(optim1, model1);
|
||||
step(optim1, model1);
|
||||
|
||||
// Do 2 steps of model 2 without saving the optimizer
|
||||
step(optim2, model2);
|
||||
step(optim2_2, model2);
|
||||
at::Tensor d, t, H_diag, prev_flat_grad, prev_loss;
|
||||
std::deque<at::Tensor> old_dirs, old_stps;
|
||||
|
||||
// Do 1 step of model 3
|
||||
step(optim3, model3);
|
||||
const auto& params_ = optim1.param_groups()[0].params();
|
||||
auto key_ = c10::guts::to_string(params_[0].unsafeGetTensorImpl());
|
||||
const auto& optim1_state = static_cast<const LBFGSParamState&>(*(optim1.state().at(key_).get()));
|
||||
d = optim1_state.d();
|
||||
t = at::tensor(optim1_state.t());
|
||||
H_diag = optim1_state.H_diag();
|
||||
prev_flat_grad = optim1_state.prev_flat_grad();
|
||||
prev_loss = at::tensor(optim1_state.prev_loss());
|
||||
old_dirs = optim1_state.old_dirs();
|
||||
|
||||
// save the optimizer
|
||||
auto optim_tempfile = c10::make_tempfile();
|
||||
torch::save(optim3, optim_tempfile.name);
|
||||
torch::load(optim3_2, optim_tempfile.name);
|
||||
// write buffers to the file
|
||||
auto optim_tempfile_old_format = c10::make_tempfile();
|
||||
torch::serialize::OutputArchive output_archive;
|
||||
output_archive.write("d", d, /*is_buffer=*/true);
|
||||
output_archive.write("t", t, /*is_buffer=*/true);
|
||||
output_archive.write("H_diag", H_diag, /*is_buffer=*/true);
|
||||
output_archive.write("prev_flat_grad", prev_flat_grad, /*is_buffer=*/true);
|
||||
output_archive.write("prev_loss", prev_loss, /*is_buffer=*/true);
|
||||
write_tensors_to_archive(output_archive, "old_dirs", old_dirs);
|
||||
write_tensors_to_archive(output_archive, "old_stps", old_stps);
|
||||
output_archive.save_to(optim_tempfile_old_format.name);
|
||||
|
||||
auto& optim3_2_param_groups = optim3_2.param_groups();
|
||||
auto& optim3_param_groups = optim3.param_groups();
|
||||
auto& optim3_2_state = optim3_2.state();
|
||||
auto& optim3_state = optim3.state();
|
||||
auto optim1_2 = LBFGS(model1_params, torch::optim::LBFGSOptions());
|
||||
OLD_SERIALIZATION_LOGIC_WARNING_CHECK(torch::load, optim1_2, optim_tempfile_old_format.name);
|
||||
|
||||
// LBFGS only supports 1 param_group
|
||||
// optim3_2 and optim1 should have param_groups of size 1
|
||||
ASSERT_TRUE(optim3_param_groups.size() == 1);
|
||||
ASSERT_TRUE(optim3_2_param_groups.size() == 1);
|
||||
// LBFGS only maintains one global state
|
||||
ASSERT_TRUE(optim3_2_state.size() == 1);
|
||||
ASSERT_TRUE(optim3_state.size() == 1);
|
||||
const auto& params1_2_ = optim1_2.param_groups()[0].params();
|
||||
auto param_key = c10::guts::to_string(params1_2_[0].unsafeGetTensorImpl());
|
||||
auto& optim1_2_state = static_cast<LBFGSParamState&>(*(optim1_2.state().at(param_key).get()));
|
||||
|
||||
// checking correctness of serialization logic for optimizer.param_groups_ and optimizer.state_
|
||||
for (int i = 0; i < optim3_2_param_groups.size(); i++) {
|
||||
is_optimizer_param_group_equal<LBFGSOptions>(
|
||||
optim3_2_param_groups[i], optim3_param_groups[i]);
|
||||
is_optimizer_state_equal<LBFGSParamState>(optim3_2_state, optim3_state);
|
||||
}
|
||||
// old LBFGS didn't track func_evals, n_iter, ro, al values
|
||||
optim1_2_state.func_evals(optim1_state.func_evals());
|
||||
optim1_2_state.n_iter(optim1_state.n_iter());
|
||||
optim1_2_state.ro(optim1_state.ro());
|
||||
optim1_2_state.al(optim1_state.al());
|
||||
|
||||
// Do step2 for model 3
|
||||
step(optim3_2, model3);
|
||||
|
||||
param1 = model1->named_parameters();
|
||||
param2 = model2->named_parameters();
|
||||
param3 = model3->named_parameters();
|
||||
for (const auto& p : param1) {
|
||||
const auto& name = p.key();
|
||||
// Model 1 and 3 should be the same
|
||||
ASSERT_TRUE(
|
||||
param1[name].norm().item<float>() == param3[name].norm().item<float>());
|
||||
ASSERT_TRUE(
|
||||
param1[name].norm().item<float>() != param2[name].norm().item<float>());
|
||||
}
|
||||
is_optimizer_state_equal<LBFGSParamState>(optim1.state(), optim1_2.state());
|
||||
}
|
||||
|
||||
TEST(SerializeTest, XOR_CUDA) {
|
||||
|
@ -138,7 +138,7 @@ void testClassDerive() {
|
||||
static const auto torchbindSrc = R"JIT(
|
||||
class FooBar1234(Module):
|
||||
__parameters__ = []
|
||||
f : __torch__.torch.classes._TorchScriptTesting_StackString
|
||||
f : __torch__.torch.classes._TorchScriptTesting._StackString
|
||||
training : bool
|
||||
def forward(self: __torch__.FooBar1234) -> str:
|
||||
return (self.f).top()
|
||||
|
@ -66,7 +66,7 @@ struct PickleTester : torch::CustomClassHolder {
|
||||
std::vector<int64_t> vals;
|
||||
};
|
||||
|
||||
static auto test = torch::class_<Foo>("_TorchScriptTesting_Foo")
|
||||
static auto test = torch::class_<Foo>("_TorchScriptTesting", "_Foo")
|
||||
.def(torch::init<int64_t, int64_t>())
|
||||
// .def(torch::init<>())
|
||||
.def("info", &Foo::info)
|
||||
@ -75,7 +75,9 @@ static auto test = torch::class_<Foo>("_TorchScriptTesting_Foo")
|
||||
.def("combine", &Foo::combine);
|
||||
|
||||
static auto testStack =
|
||||
torch::class_<MyStackClass<std::string>>("_TorchScriptTesting_StackString")
|
||||
torch::class_<MyStackClass<std::string>>(
|
||||
"_TorchScriptTesting",
|
||||
"_StackString")
|
||||
.def(torch::init<std::vector<std::string>>())
|
||||
.def("push", &MyStackClass<std::string>::push)
|
||||
.def("pop", &MyStackClass<std::string>::pop)
|
||||
@ -101,7 +103,7 @@ static auto testStack =
|
||||
// clang-format on
|
||||
|
||||
static auto testPickle =
|
||||
torch::class_<PickleTester>("_TorchScriptTesting_PickleTester")
|
||||
torch::class_<PickleTester>("_TorchScriptTesting", "_PickleTester")
|
||||
.def(torch::init<std::vector<int64_t>>())
|
||||
.def_pickle(
|
||||
[](c10::intrusive_ptr<PickleTester> self) { // __getstate__
|
||||
@ -127,10 +129,10 @@ at::Tensor take_an_instance(const c10::intrusive_ptr<PickleTester>& instance) {
|
||||
|
||||
torch::RegisterOperators& register_take_instance() {
|
||||
static auto instance_registry = torch::RegisterOperators().op(
|
||||
torch::RegisterOperators::options()
|
||||
.schema(
|
||||
"_TorchScriptTesting::take_an_instance(__torch__.torch.classes._TorchScriptTesting_PickleTester x) -> Tensor Y")
|
||||
.catchAllKernel<decltype(take_an_instance), &take_an_instance>());
|
||||
torch::RegisterOperators::options()
|
||||
.schema(
|
||||
"_TorchScriptTesting::take_an_instance(__torch__.torch.classes._TorchScriptTesting._PickleTester x) -> Tensor Y")
|
||||
.catchAllKernel<decltype(take_an_instance), &take_an_instance>());
|
||||
return instance_registry;
|
||||
}
|
||||
|
||||
@ -146,7 +148,7 @@ void testTorchbindIValueAPI() {
|
||||
auto custom_class_obj = make_custom_class<MyStackClass<std::string>>(
|
||||
std::vector<std::string>{"foo", "bar"});
|
||||
m.define(R"(
|
||||
def forward(self, s : __torch__.torch.classes._TorchScriptTesting_StackString):
|
||||
def forward(self, s : __torch__.torch.classes._TorchScriptTesting._StackString):
|
||||
return s.pop(), s
|
||||
)");
|
||||
|
||||
|
@ -343,7 +343,8 @@ void testLiteInterpreterBuiltinFunction() {
|
||||
namespace {
|
||||
static auto reg =
|
||||
torch::jit::class_<TorchBindLiteInterpreterTestStruct>(
|
||||
"_TorchScriptTesting_LiteInterpreterTest")
|
||||
"_TorchScriptTesting",
|
||||
"_LiteInterpreterTest")
|
||||
.def("get", &TorchBindLiteInterpreterTestStruct::get)
|
||||
.def_pickle(
|
||||
// __getattr__
|
||||
|
@ -1,85 +0,0 @@
|
||||
from collections import namedtuple
|
||||
|
||||
TorchNNTestParams = namedtuple(
|
||||
'TorchNNTestParams',
|
||||
[
|
||||
'module_name',
|
||||
'module_variant_name',
|
||||
'test_instance',
|
||||
'cpp_constructor_args',
|
||||
'has_parity',
|
||||
'device',
|
||||
]
|
||||
)
|
||||
|
||||
CppArg = namedtuple('CppArg', ['type', 'value'])
|
||||
|
||||
ParityStatus = namedtuple('ParityStatus', ['has_impl_parity', 'has_doc_parity'])
|
||||
|
||||
TorchNNModuleMetadata = namedtuple(
|
||||
'TorchNNModuleMetadata',
|
||||
[
|
||||
'cpp_default_constructor_args',
|
||||
'num_attrs_recursive',
|
||||
'python_ignored_constructor_args',
|
||||
'python_ignored_attrs',
|
||||
'python_optional_attribute_to_jit_type',
|
||||
'cpp_sources',
|
||||
]
|
||||
)
|
||||
TorchNNModuleMetadata.__new__.__defaults__ = (None, None, [], [], {}, '')
|
||||
|
||||
'''
|
||||
This function expects the parity tracker Markdown file to have the following format:
|
||||
|
||||
```
|
||||
## package1_name
|
||||
|
||||
API | Implementation Parity | Doc Parity
|
||||
------------- | ------------- | -------------
|
||||
API_Name|No|No
|
||||
...
|
||||
|
||||
## package2_name
|
||||
|
||||
API | Implementation Parity | Doc Parity
|
||||
------------- | ------------- | -------------
|
||||
API_Name|No|No
|
||||
...
|
||||
```
|
||||
|
||||
The returned dict has the following format:
|
||||
|
||||
```
|
||||
Dict[package_name]
|
||||
-> Dict[api_name]
|
||||
-> ParityStatus
|
||||
```
|
||||
'''
|
||||
def parse_parity_tracker_table(file_path):
|
||||
def parse_parity_choice(str):
|
||||
if str in ['Yes', 'No']:
|
||||
return str == 'Yes'
|
||||
else:
|
||||
raise RuntimeError(
|
||||
'{} is not a supported parity choice. The valid choices are "Yes" and "No".'.format(str))
|
||||
|
||||
parity_tracker_dict = {}
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
all_text = f.read()
|
||||
packages = all_text.split('##')
|
||||
for package in packages[1:]:
|
||||
lines = [line.strip() for line in package.split('\n') if line.strip() != '']
|
||||
package_name = lines[0]
|
||||
if package_name in parity_tracker_dict:
|
||||
raise RuntimeError("Duplicated package name `{}` found in {}".format(package_name, file_path))
|
||||
else:
|
||||
parity_tracker_dict[package_name] = {}
|
||||
for api_status in lines[3:]:
|
||||
api_name, has_impl_parity_str, has_doc_parity_str = [x.strip() for x in api_status.split('|')]
|
||||
parity_tracker_dict[package_name][api_name] = ParityStatus(
|
||||
has_impl_parity=parse_parity_choice(has_impl_parity_str),
|
||||
has_doc_parity=parse_parity_choice(has_doc_parity_str))
|
||||
|
||||
return parity_tracker_dict
|
||||
|
237
test/cpp_api_parity/functional_impl_check.py
Normal file
237
test/cpp_api_parity/functional_impl_check.py
Normal file
@ -0,0 +1,237 @@
|
||||
# The purpose of this test is to check that we have implementation parity between
|
||||
# a Python `torch.nn.functional` function and its corresponding C++ `torch::nn::functional`
|
||||
# function. Concretely, this test does the following:
|
||||
#
|
||||
# 1. Get a test params dict from common_nn.py, run forward pass on the Python functional
|
||||
# created using the test params.
|
||||
#
|
||||
# 2. Serialize the Python functional's forward input arguments, deserialize them
|
||||
# in C++ and use them as input for the C++ functional's forward pass.
|
||||
#
|
||||
# 3. Run the forward pass on the C++ functional, and serialize the C++ functional's
|
||||
# forward output.
|
||||
#
|
||||
# 4. Compare Python/C++ functional's forward output. If they are the same, then we
|
||||
# have implementation parity between Python/C++ module.
|
||||
|
||||
import tempfile
|
||||
from string import Template
|
||||
import re
|
||||
import pprint
|
||||
import os
|
||||
|
||||
import torch
|
||||
from cpp_api_parity.utils import TorchNNFunctionalTestParams, TORCH_NN_COMMON_TEST_HARNESS, \
|
||||
compile_cpp_code_inline, set_python_tensors_requires_grad, move_python_tensors_to_device, \
|
||||
add_test, compute_cpp_args_construction_stmts_and_forward_arg_symbols, serialize_arg_dict_as_script_module, \
|
||||
compute_arg_dict, decorate_test_fn, compute_temp_file_path, generate_error_msg, is_torch_nn_functional_test, \
|
||||
try_remove_folder
|
||||
from cpp_api_parity.sample_functional import SAMPLE_FUNCTIONAL_CPP_SOURCE
|
||||
|
||||
# Expected substitutions:
|
||||
#
|
||||
# ${functional_variant_name} (e.g. `BCELoss_no_reduce`)
|
||||
# ${cpp_args_construction_stmts}
|
||||
# ${cpp_function_call}
|
||||
TORCH_NN_FUNCTIONAL_TEST_FORWARD = Template("""
|
||||
void ${functional_variant_name}_test_forward(
|
||||
const std::string& arg_dict_file_path,
|
||||
const std::string& forward_output_file_path) {
|
||||
pybind11::gil_scoped_release no_gil;
|
||||
|
||||
namespace F = torch::nn::functional;
|
||||
|
||||
// Declare arguments
|
||||
auto arg_dict = load_dict_from_file(arg_dict_file_path);
|
||||
${cpp_args_construction_stmts};
|
||||
|
||||
// Some functionals (such as `F::rrelu`) create random tensors in their call path.
|
||||
// To make sure the random tensors created are the same in Python/C++, we need
|
||||
// to set the RNG seed manually.
|
||||
torch::manual_seed(0);
|
||||
|
||||
// Run function with arguments
|
||||
auto cpp_output = ${cpp_function_call};
|
||||
|
||||
// Save the output into a file to be compared in Python later
|
||||
write_ivalue_to_file(torch::IValue(cpp_output), forward_output_file_path);
|
||||
}
|
||||
""")
|
||||
|
||||
def run_forward(unit_test_class, test_params):
|
||||
device = test_params.device
|
||||
|
||||
inputs = set_python_tensors_requires_grad(move_python_tensors_to_device(
|
||||
[arg_value for _, arg_value in test_params.arg_dict['input']], device))
|
||||
inputs += move_python_tensors_to_device(
|
||||
[arg_value for _, arg_value in test_params.arg_dict['target']], device)
|
||||
inputs += move_python_tensors_to_device(
|
||||
[arg_value for _, arg_value in test_params.arg_dict['extra_args']], device)
|
||||
|
||||
# Some functionals (such as `F.rrelu`) create random tensors in their call path.
|
||||
# To make sure the random tensors created are the same in Python/C++, we need
|
||||
# to set the RNG seed manually.
|
||||
torch.manual_seed(0)
|
||||
python_output = test_params.test_instance.constructor()(*inputs)
|
||||
|
||||
return python_output
|
||||
|
||||
def test_forward(unit_test_class, test_params):
|
||||
functional_variant_name = test_params.functional_variant_name
|
||||
cpp_tmp_folder = test_params.cpp_tmp_folder
|
||||
# Remove the temporary folder if it exists already
|
||||
try_remove_folder(cpp_tmp_folder)
|
||||
os.mkdir(cpp_tmp_folder)
|
||||
|
||||
# Run forward on Python functional
|
||||
python_output = run_forward(unit_test_class, test_params)
|
||||
|
||||
# Save Python arguments to be used from C++ function
|
||||
arg_dict_file_path = compute_temp_file_path(cpp_tmp_folder, functional_variant_name, 'arg_dict')
|
||||
serialize_arg_dict_as_script_module(test_params.arg_dict).save(arg_dict_file_path)
|
||||
|
||||
cpp_test_name = '{}_test_forward'.format(test_params.functional_variant_name)
|
||||
cpp_test_fn = getattr(unit_test_class.functional_impl_check_cpp_module, cpp_test_name)
|
||||
|
||||
def run_cpp_test_fn_and_check_output():
|
||||
forward_output_file_path = compute_temp_file_path(cpp_tmp_folder, functional_variant_name, 'forward_output')
|
||||
|
||||
cpp_test_fn(arg_dict_file_path, forward_output_file_path)
|
||||
cpp_output = torch.load(forward_output_file_path)
|
||||
|
||||
# Check that forward outputs are equal
|
||||
unit_test_class.assertEqual(
|
||||
python_output, cpp_output,
|
||||
message=generate_error_msg("forward output", cpp_output, python_output))
|
||||
|
||||
run_cpp_test_fn_and_check_output()
|
||||
|
||||
# Remove temporary folder that stores C++ outputs
|
||||
try_remove_folder(cpp_tmp_folder)
|
||||
|
||||
def compute_functional_name(test_params_dict):
|
||||
def camel_case_to_snake_case(camel_case_str):
|
||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', camel_case_str).lower()
|
||||
|
||||
if 'cpp_options_args' in test_params_dict:
|
||||
# Expected format for `cpp_options_args`: `F::FunctionalFuncOptions(...)`
|
||||
# Example output: `binary_cross_entropy`
|
||||
return camel_case_to_snake_case(
|
||||
test_params_dict['cpp_options_args'].split('(')[0].replace('F::', '').replace('FuncOptions', ''))
|
||||
elif 'cpp_function_call' in test_params_dict:
|
||||
# Expected format for `cpp_function_call`: `F::functional_name(...)`
|
||||
# Example output: `binary_cross_entropy`
|
||||
return test_params_dict['cpp_function_call'].split('(')[0].replace('F::', '')
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{}".format(
|
||||
pprint.pformat(test_params_dict)))
|
||||
|
||||
def compute_cpp_function_call(test_params_dict, arg_dict, functional_name):
|
||||
if 'cpp_function_call' in test_params_dict:
|
||||
return test_params_dict['cpp_function_call']
|
||||
elif 'cpp_options_args' in test_params_dict:
|
||||
cpp_forward_args_symbols = [arg_name for arg_name, _ in
|
||||
arg_dict['input'] + arg_dict['target'] + arg_dict['extra_args']]
|
||||
return 'F::{}({}, {})'.format(
|
||||
functional_name, ", ".join(cpp_forward_args_symbols), test_params_dict['cpp_options_args'])
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{}".format(
|
||||
pprint.pformat(test_params_dict)))
|
||||
|
||||
def process_test_params_for_functional(test_params_dict, device, test_instance_class):
|
||||
test_instance = test_instance_class(**test_params_dict)
|
||||
functional_name = compute_functional_name(test_params_dict)
|
||||
assert test_instance.get_name().startswith('test_')
|
||||
# Example output: `BCELoss_no_reduce_cuda`
|
||||
functional_variant_name = test_instance.get_name()[5:] + (('_' + device) if device != 'cpu' else '')
|
||||
arg_dict = compute_arg_dict(test_params_dict, test_instance)
|
||||
|
||||
return TorchNNFunctionalTestParams(
|
||||
functional_name=functional_name,
|
||||
functional_variant_name=functional_variant_name,
|
||||
test_instance=test_instance,
|
||||
cpp_function_call=compute_cpp_function_call(test_params_dict, arg_dict, functional_name),
|
||||
arg_dict=arg_dict,
|
||||
has_parity=test_params_dict.get('has_parity', True),
|
||||
device=device,
|
||||
cpp_tmp_folder=tempfile.mkdtemp(),
|
||||
)
|
||||
|
||||
def write_test_to_test_class(
|
||||
unit_test_class, test_params_dict, test_instance_class, parity_table, devices):
|
||||
assert is_torch_nn_functional_test(test_params_dict)
|
||||
|
||||
assert 'cpp_options_args' in test_params_dict or 'cpp_function_call' in test_params_dict, (
|
||||
"To enable C++ API parity test, "
|
||||
"`cpp_options_args` or `cpp_function_call` entry must be present in test params dict:\n{}. \n"
|
||||
"If you are interested in adding the C++ API parity test, please see:\n"
|
||||
"NOTE [How to check NN module / functional API parity between Python and C++ frontends]. \n"
|
||||
"If not, please add `test_cpp_api_parity=False` to the test params dict and file an issue about this."
|
||||
).format(pprint.pformat(test_params_dict))
|
||||
|
||||
assert not ('cpp_options_args' in test_params_dict and 'cpp_function_call' in test_params_dict), (
|
||||
"Only one of `cpp_options_args` and `cpp_function_call` entries "
|
||||
"should be present in test params dict:\n{}").format(pprint.pformat(test_params_dict))
|
||||
|
||||
functional_name = compute_functional_name(test_params_dict)
|
||||
|
||||
assert hasattr(torch.nn.functional, functional_name), \
|
||||
"`torch.nn.functional` doesn't have function `{}`. (Discovered while processing\n{}.)".format(
|
||||
functional_name, pprint.pformat(test_params_dict))
|
||||
|
||||
functional_full_name = 'F::' + functional_name
|
||||
|
||||
assert functional_full_name in parity_table['torch::nn::functional'], (
|
||||
"Please add `{}` entry to `torch::nn::functional` section of `test/cpp_api_parity/parity-tracker.md`. "
|
||||
"(Discovered while processing\n{}.)").format(functional_full_name, pprint.pformat(test_params_dict))
|
||||
|
||||
for device in devices:
|
||||
test_params = process_test_params_for_functional(
|
||||
test_params_dict=test_params_dict,
|
||||
device=device,
|
||||
test_instance_class=test_instance_class,
|
||||
)
|
||||
unit_test_name = 'test_torch_nn_functional_{}'.format(test_params.functional_variant_name)
|
||||
unit_test_class.functional_test_params_map[unit_test_name] = test_params
|
||||
|
||||
def test_fn(self):
|
||||
test_forward(
|
||||
unit_test_class=self, test_params=unit_test_class.functional_test_params_map[self._testMethodName])
|
||||
|
||||
test_fn = decorate_test_fn(
|
||||
test_fn=test_fn,
|
||||
test_cuda=test_params_dict.get('test_cuda', True),
|
||||
has_impl_parity=parity_table['torch::nn::functional'][functional_full_name][0] and
|
||||
test_params_dict.get('has_parity', True),
|
||||
device=device)
|
||||
|
||||
add_test(unit_test_class, unit_test_name, test_fn)
|
||||
|
||||
def generate_test_cpp_sources(test_params, template):
|
||||
cpp_args_construction_stmts, _ = compute_cpp_args_construction_stmts_and_forward_arg_symbols(test_params)
|
||||
|
||||
test_cpp_sources = template.substitute(
|
||||
functional_variant_name=test_params.functional_variant_name,
|
||||
cpp_args_construction_stmts=";\n ".join(cpp_args_construction_stmts),
|
||||
cpp_function_call=test_params.cpp_function_call,
|
||||
)
|
||||
return test_cpp_sources
|
||||
|
||||
# Build all C++ tests together, instead of once per test.
|
||||
def build_cpp_tests(unit_test_class, print_cpp_source=False):
|
||||
assert len(unit_test_class.functional_test_params_map) > 0
|
||||
cpp_sources = TORCH_NN_COMMON_TEST_HARNESS + SAMPLE_FUNCTIONAL_CPP_SOURCE
|
||||
functions = []
|
||||
for test_name, test_params in unit_test_class.functional_test_params_map.items():
|
||||
cpp_sources += generate_test_cpp_sources(test_params=test_params, template=TORCH_NN_FUNCTIONAL_TEST_FORWARD)
|
||||
functions.append('{}_test_forward'.format(test_params.functional_variant_name))
|
||||
if print_cpp_source:
|
||||
print(cpp_sources)
|
||||
|
||||
cpp_module = compile_cpp_code_inline(
|
||||
name='functional_impl_check',
|
||||
cpp_sources=cpp_sources,
|
||||
functions=functions)
|
||||
unit_test_class.functional_impl_check_cpp_module = cpp_module
|
298
test/cpp_api_parity/module_impl_check.py
Normal file
298
test/cpp_api_parity/module_impl_check.py
Normal file
@ -0,0 +1,298 @@
|
||||
# The purpose of this test is to check that we have implementation parity between
|
||||
# a Python `torch.nn` module and its corresponding C++ `torch::nn` module. Concretely,
|
||||
# this test does the following:
|
||||
#
|
||||
# 1. Get a test params dict from common_nn.py, run forward and backward on the
|
||||
# Python module created using the test params.
|
||||
#
|
||||
# 2. Serialize the Python module's parameters / buffers and its forward input
|
||||
# arguments, deserialize them in C++ and load them into the C++ module.
|
||||
#
|
||||
# 3. Run the same forward and backward passes on the C++ module, and serialize
|
||||
# the C++ module's forward output and backward gradients.
|
||||
#
|
||||
# 4. Compare Python/C++ module's forward output and backward gradients. If they
|
||||
# are the same, then we have implementation parity between Python/C++ module.
|
||||
|
||||
import tempfile
|
||||
from string import Template
|
||||
import types
|
||||
import pprint
|
||||
import os
|
||||
|
||||
import torch
|
||||
from cpp_api_parity.utils import TorchNNModuleTestParams, TORCH_NN_COMMON_TEST_HARNESS, \
|
||||
compile_cpp_code_inline, set_python_tensors_requires_grad, move_python_tensors_to_device, \
|
||||
add_test, compute_cpp_args_construction_stmts_and_forward_arg_symbols, serialize_arg_dict_as_script_module, \
|
||||
compute_arg_dict, decorate_test_fn, compute_temp_file_path, generate_error_msg, is_torch_nn_functional_test, \
|
||||
try_remove_folder
|
||||
from cpp_api_parity.sample_module import SAMPLE_MODULE_CPP_SOURCE
|
||||
|
||||
# Expected substitutions:
|
||||
#
|
||||
# ${module_variant_name} (e.g. `Linear_no_bias_cpu`)
|
||||
# ${module_qualified_name} (e.g. `torch::nn::Linear`)
|
||||
# ${cpp_args_construction_stmts}
|
||||
# ${cpp_constructor_args}
|
||||
# ${device}
|
||||
# ${cpp_forward_args_symbols}
|
||||
TORCH_NN_MODULE_TEST_FORWARD_BACKWARD = Template("""
|
||||
void ${module_variant_name}_test_forward_backward(
|
||||
const std::string& arg_dict_file_path,
|
||||
const std::string& module_file_path,
|
||||
const std::string& forward_output_file_path,
|
||||
const std::string& backward_grad_dict_file_path) {
|
||||
pybind11::gil_scoped_release no_gil;
|
||||
|
||||
// Declare arguments
|
||||
auto arg_dict = load_dict_from_file(arg_dict_file_path);
|
||||
${cpp_args_construction_stmts};
|
||||
|
||||
// Construct module and load params/buffers from Python module
|
||||
${module_qualified_name} module${cpp_constructor_args};
|
||||
module->to(std::string("${device}"));
|
||||
torch::load(module, module_file_path);
|
||||
|
||||
// Some modules (such as `RReLU`) create random tensors in their forward pass.
|
||||
// To make sure the random tensors created are the same in Python/C++, we need
|
||||
// to set the RNG seed manually.
|
||||
torch::manual_seed(0);
|
||||
|
||||
// Forward pass
|
||||
auto cpp_output = module(${cpp_forward_args_symbols});
|
||||
|
||||
// Save the output into a file to be compared in Python later
|
||||
write_ivalue_to_file(torch::IValue(cpp_output), forward_output_file_path);
|
||||
|
||||
// Backward pass
|
||||
cpp_output.sum().backward();
|
||||
|
||||
// Put all gradients into a c10::Dict, save it into a file to be compared in Python later
|
||||
c10::Dict<std::string, torch::Tensor> grad_dict;
|
||||
for (const auto& param : module->named_parameters()) {
|
||||
torch::Tensor grad = param.value().grad();
|
||||
if (grad.is_sparse()) {
|
||||
grad_dict.insert(param.key() + "_grad_indices", grad.coalesce().indices());
|
||||
grad_dict.insert(param.key() + "_grad_values", grad.coalesce().values());
|
||||
} else {
|
||||
grad_dict.insert(param.key() + "_grad", grad);
|
||||
}
|
||||
}
|
||||
|
||||
write_ivalue_to_file(torch::IValue(grad_dict), backward_grad_dict_file_path);
|
||||
}
|
||||
""")
|
||||
|
||||
def run_python_forward_backward(unit_test_class, test_params):
|
||||
device = test_params.device
|
||||
module = test_params.test_instance.constructor(*test_params.test_instance.constructor_args).to(device)
|
||||
|
||||
inputs = set_python_tensors_requires_grad(move_python_tensors_to_device(
|
||||
[arg_value for _, arg_value in test_params.arg_dict['input']], device))
|
||||
inputs += move_python_tensors_to_device(
|
||||
[arg_value for _, arg_value in test_params.arg_dict['target']], device)
|
||||
inputs += move_python_tensors_to_device(
|
||||
[arg_value for _, arg_value in test_params.arg_dict['extra_args']], device)
|
||||
|
||||
# Some modules (such as `RReLU`) create random tensors in their forward pass.
|
||||
# To make sure the random tensors created are the same in Python/C++, we need
|
||||
# to set the RNG seed manually.
|
||||
torch.manual_seed(0)
|
||||
|
||||
# Forward pass
|
||||
python_output = module(*inputs)
|
||||
|
||||
# NOTE: This is a workaround to allow any module to be traced.
|
||||
# We can do this because we are only interested in transferring
|
||||
# the Python module's parameters and buffers to the C++ module.
|
||||
module.forward = types.MethodType(lambda self, input: input, module)
|
||||
script_module = torch.jit.trace(module, torch.tensor(0))
|
||||
|
||||
# Backward pass
|
||||
python_output.sum().backward()
|
||||
|
||||
# Put all gradients into a dict, to be compared later
|
||||
python_grad_dict = {}
|
||||
for name, param in module.named_parameters():
|
||||
grad = param.grad
|
||||
if grad.is_sparse:
|
||||
python_grad_dict[name + "_grad_indices"] = grad.coalesce().indices()
|
||||
python_grad_dict[name + "_grad_values"] = grad.coalesce().values()
|
||||
else:
|
||||
python_grad_dict[name + "_grad"] = grad
|
||||
|
||||
return script_module, python_output, python_grad_dict
|
||||
|
||||
def test_forward_backward(unit_test_class, test_params):
|
||||
module_variant_name = test_params.module_variant_name
|
||||
cpp_tmp_folder = test_params.cpp_tmp_folder
|
||||
# Remove the temporary folder if it exists already
|
||||
try_remove_folder(cpp_tmp_folder)
|
||||
os.mkdir(cpp_tmp_folder)
|
||||
|
||||
# Run forward and backward on Python module
|
||||
script_module, python_output, python_grad_dict = run_python_forward_backward(unit_test_class, test_params)
|
||||
|
||||
# Save Python module and arguments to be used from C++ function
|
||||
module_file_path = compute_temp_file_path(cpp_tmp_folder, module_variant_name, 'module')
|
||||
arg_dict_file_path = compute_temp_file_path(cpp_tmp_folder, module_variant_name, 'arg_dict')
|
||||
script_module.save(module_file_path)
|
||||
serialize_arg_dict_as_script_module(test_params.arg_dict).save(arg_dict_file_path)
|
||||
|
||||
cpp_test_name = '{}_test_forward_backward'.format(test_params.module_variant_name)
|
||||
cpp_test_fn = getattr(unit_test_class.module_impl_check_cpp_module, cpp_test_name)
|
||||
|
||||
def run_cpp_test_fn_and_check_output():
|
||||
forward_output_file_path = compute_temp_file_path(cpp_tmp_folder, module_variant_name, 'forward_output')
|
||||
backward_grad_dict_file_path = compute_temp_file_path(cpp_tmp_folder, module_variant_name, 'backward_grad_dict')
|
||||
|
||||
cpp_test_fn(arg_dict_file_path, module_file_path, forward_output_file_path, backward_grad_dict_file_path)
|
||||
cpp_output = torch.load(forward_output_file_path)
|
||||
cpp_grad_dict = torch.load(backward_grad_dict_file_path)
|
||||
|
||||
# Check that forward outputs are equal
|
||||
unit_test_class.assertEqual(python_output, cpp_output,
|
||||
message=generate_error_msg("forward output", cpp_output, python_output))
|
||||
|
||||
# Check that module parameter gradients are equal after backward pass
|
||||
unit_test_class.assertEqual(
|
||||
len(python_grad_dict), len(cpp_grad_dict),
|
||||
message=generate_error_msg("# of parameters", len(cpp_grad_dict), len(python_grad_dict)))
|
||||
for key in python_grad_dict:
|
||||
param_name = None
|
||||
for suffix in ['_grad', '_grad_indices', '_grad_values']:
|
||||
if key.endswith(suffix):
|
||||
param_name = key[:-len(suffix)]
|
||||
break
|
||||
assert param_name is not None
|
||||
sparsity_str = 'sparse' if key.endswith('_grad_indices') or key.endswith('_grad_values') else 'dense'
|
||||
|
||||
unit_test_class.assertTrue(
|
||||
key in cpp_grad_dict,
|
||||
msg=generate_error_msg(
|
||||
"\"Does module have a parameter named `{}` with {} gradient?\"".format(param_name, sparsity_str),
|
||||
False, True))
|
||||
unit_test_class.assertEqual(
|
||||
python_grad_dict[key], cpp_grad_dict[key],
|
||||
message=generate_error_msg(
|
||||
"`{}`'s {} gradient (`{}`)".format(param_name, sparsity_str, key),
|
||||
cpp_grad_dict[key], python_grad_dict[key]))
|
||||
|
||||
run_cpp_test_fn_and_check_output()
|
||||
|
||||
# Remove temporary folder that stores C++ outputs
|
||||
try_remove_folder(cpp_tmp_folder)
|
||||
|
||||
def compute_module_name(test_params_dict):
|
||||
fullname = test_params_dict.get('fullname', None)
|
||||
if fullname:
|
||||
module_name = fullname.split('_')[0]
|
||||
else:
|
||||
module_name = test_params_dict.get('module_name')
|
||||
return module_name
|
||||
|
||||
def process_test_params_for_module(test_params_dict, device, test_instance_class):
|
||||
module_name = compute_module_name(test_params_dict)
|
||||
test_params_dict['constructor'] = test_params_dict.get('constructor', getattr(torch.nn, module_name))
|
||||
test_instance = test_instance_class(**test_params_dict)
|
||||
assert test_instance.get_name().startswith('test_')
|
||||
# Example output: `BCELoss_weights_cuda`
|
||||
module_variant_name = test_instance.get_name()[5:] + (('_' + device) if device != 'cpu' else '')
|
||||
|
||||
if 'constructor_args' in test_params_dict:
|
||||
assert 'cpp_constructor_args' in test_params_dict, (
|
||||
"If `constructor_args` is present in test params dict, to enable C++ API parity test, "
|
||||
"`cpp_constructor_args` must be present in:\n{}"
|
||||
"If you are interested in adding the C++ API parity test, please see:\n"
|
||||
"NOTE [How to check NN module / functional API parity between Python and C++ frontends]. \n"
|
||||
"If not, please add `test_cpp_api_parity=False` to the test params dict and file an issue about this."
|
||||
).format(pprint.pformat(test_params_dict))
|
||||
|
||||
return TorchNNModuleTestParams(
|
||||
module_name=module_name,
|
||||
module_variant_name=module_variant_name,
|
||||
test_instance=test_instance,
|
||||
cpp_constructor_args=test_params_dict.get('cpp_constructor_args', ''),
|
||||
arg_dict=compute_arg_dict(test_params_dict, test_instance),
|
||||
has_parity=test_params_dict.get('has_parity', True),
|
||||
device=device,
|
||||
cpp_tmp_folder=tempfile.mkdtemp(),
|
||||
)
|
||||
|
||||
def write_test_to_test_class(
|
||||
unit_test_class, test_params_dict, test_instance_class, parity_table, devices):
|
||||
assert not is_torch_nn_functional_test(test_params_dict)
|
||||
|
||||
module_name = compute_module_name(test_params_dict)
|
||||
|
||||
assert hasattr(torch.nn, module_name), (
|
||||
"`torch.nn` doesn't have module `{}`. "
|
||||
"If you are adding a new test, please set `fullname` using format `ModuleName_desc` "
|
||||
"or set `module_name` using format `ModuleName` in the module test dict:\n{}"
|
||||
).format(module_name, pprint.pformat(test_params_dict))
|
||||
|
||||
module_full_name = 'torch::nn::' + module_name
|
||||
|
||||
assert module_full_name in parity_table['torch::nn'], (
|
||||
"Please add `{}` entry to `torch::nn` section of `test/cpp_api_parity/parity-tracker.md`. "
|
||||
"(Discovered while processing\n{}.)").format(module_full_name, pprint.pformat(test_params_dict))
|
||||
|
||||
for device in devices:
|
||||
test_params = process_test_params_for_module(
|
||||
test_params_dict=test_params_dict,
|
||||
device=device,
|
||||
test_instance_class=test_instance_class,
|
||||
)
|
||||
unit_test_name = 'test_torch_nn_{}'.format(test_params.module_variant_name)
|
||||
unit_test_class.module_test_params_map[unit_test_name] = test_params
|
||||
|
||||
def test_fn(self):
|
||||
test_forward_backward(
|
||||
unit_test_class=self, test_params=unit_test_class.module_test_params_map[self._testMethodName])
|
||||
|
||||
test_fn = decorate_test_fn(
|
||||
test_fn=test_fn,
|
||||
test_cuda=test_params_dict.get('test_cuda', True),
|
||||
has_impl_parity=parity_table['torch::nn'][module_full_name][0] and
|
||||
test_params_dict.get('has_parity', True),
|
||||
device=device)
|
||||
|
||||
add_test(unit_test_class, unit_test_name, test_fn)
|
||||
|
||||
def generate_test_cpp_sources(test_params, template):
|
||||
device = test_params.device
|
||||
|
||||
cpp_constructor_args = test_params.cpp_constructor_args
|
||||
if cpp_constructor_args != '':
|
||||
cpp_constructor_args = '({})'.format(cpp_constructor_args)
|
||||
|
||||
cpp_args_construction_stmts, cpp_forward_args_symbols = \
|
||||
compute_cpp_args_construction_stmts_and_forward_arg_symbols(test_params)
|
||||
|
||||
test_cpp_sources = template.substitute(
|
||||
module_variant_name=test_params.module_variant_name,
|
||||
module_qualified_name='torch::nn::{}'.format(test_params.module_name),
|
||||
cpp_args_construction_stmts=";\n ".join(cpp_args_construction_stmts),
|
||||
cpp_constructor_args=cpp_constructor_args,
|
||||
cpp_forward_args_symbols=", ".join(cpp_forward_args_symbols),
|
||||
device=device,
|
||||
)
|
||||
return test_cpp_sources
|
||||
|
||||
# Build all C++ tests together, instead of once per test.
|
||||
def build_cpp_tests(unit_test_class, print_cpp_source=False):
|
||||
assert len(unit_test_class.module_test_params_map) > 0
|
||||
cpp_sources = TORCH_NN_COMMON_TEST_HARNESS + SAMPLE_MODULE_CPP_SOURCE
|
||||
functions = []
|
||||
for test_name, test_params in unit_test_class.module_test_params_map.items():
|
||||
cpp_sources += generate_test_cpp_sources(
|
||||
test_params=test_params, template=TORCH_NN_MODULE_TEST_FORWARD_BACKWARD)
|
||||
functions.append('{}_test_forward_backward'.format(test_params.module_variant_name))
|
||||
if print_cpp_source:
|
||||
print(cpp_sources)
|
||||
|
||||
cpp_module = compile_cpp_code_inline(
|
||||
name='module_impl_check',
|
||||
cpp_sources=cpp_sources,
|
||||
functions=functions)
|
||||
unit_test_class.module_impl_check_cpp_module = cpp_module
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user