mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-16 07:24:54 +08:00
Update (base update)
[ghstack-poisoned]
This commit is contained in:
@ -203,7 +203,9 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
|
||||
export USE_CUDA=0
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
export USE_CUDA=1
|
||||
fi
|
||||
export USE_ASAN=1
|
||||
export REL_WITH_DEB_INFO=1
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all"
|
||||
|
||||
@ -196,6 +196,9 @@ install_tlparse
|
||||
# ASAN test is not working
|
||||
if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
|
||||
export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=true:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0:check_initialization_order=true:debug=true
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
export ASAN_OPTIONS="${ASAN_OPTIONS}:protect_shadow_gap=0"
|
||||
fi
|
||||
export UBSAN_OPTIONS=print_stacktrace=1:suppressions=$PWD/ubsan.supp
|
||||
export PYTORCH_TEST_WITH_ASAN=1
|
||||
export PYTORCH_TEST_WITH_UBSAN=1
|
||||
@ -320,6 +323,7 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
|
||||
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
|
||||
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
|
||||
@ -331,11 +335,12 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_compile.py --verbose
|
||||
python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
|
||||
|
||||
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
|
||||
# with if required # gpus aren't available
|
||||
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives --verbose
|
||||
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_compute_comm_reordering --verbose
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
|
||||
14
.github/actions/upload-test-artifacts/action.yml
vendored
14
.github/actions/upload-test-artifacts/action.yml
vendored
@ -28,7 +28,7 @@ runs:
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test/test-reports -i '*.json'
|
||||
|
||||
- name: Zip test reports for upload
|
||||
if: runner.os != 'Windows' && !inputs.use-gha
|
||||
@ -38,7 +38,7 @@ runs:
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml' -i '*.csv'
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test/test-reports -i '*.xml' -i '*.csv'
|
||||
|
||||
- name: Zip usage log for upload
|
||||
if: runner.os != 'Windows' && !inputs.use-gha
|
||||
@ -53,8 +53,8 @@ runs:
|
||||
if [ -f 'usage_log.txt' ]; then
|
||||
zip "logs-${FILE_SUFFIX}.zip" 'usage_log.txt'
|
||||
fi
|
||||
if ls test/**/*.log 1> /dev/null 2>&1; then
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test -i '*.log'
|
||||
if find "test/test-reports" -name "*.log" 2>/dev/null | grep -q .; then
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test/test-reports -i '*.log'
|
||||
fi
|
||||
|
||||
- name: Zip debugging artifacts for upload
|
||||
@ -77,7 +77,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\test-reports\*.json'
|
||||
|
||||
- name: Zip test reports for upload
|
||||
if: runner.os == 'Windows' && !inputs.use-gha
|
||||
@ -86,7 +86,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml' -ir'!test\*.csv'
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\test-reports\*.xml' -ir'!test\test-reports\*.csv'
|
||||
|
||||
- name: Zip usage log for upload
|
||||
if: runner.os == 'Windows' && !inputs.use-gha
|
||||
@ -96,7 +96,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "logs-$Env:FILE_SUFFIX.zip" 'usage_log.txt' -ir'!test\*.log'
|
||||
7z a "logs-$Env:FILE_SUFFIX.zip" 'usage_log.txt' -ir'!test\test-reports\*.log'
|
||||
|
||||
# S3 upload
|
||||
- name: Store Test Downloaded JSONs on S3
|
||||
|
||||
@ -459,7 +459,7 @@ def generate_wheels_matrix(
|
||||
".", "_"
|
||||
),
|
||||
"pytorch_extra_install_requirements": (
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"]
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.4"]
|
||||
if os != "linux" and gpu_arch_type != "xpu"
|
||||
else ""
|
||||
),
|
||||
|
||||
2
.github/workflows/docker-builds.yml
vendored
2
.github/workflows/docker-builds.yml
vendored
@ -123,7 +123,7 @@ jobs:
|
||||
IMAGE_NAME: ${{ matrix.docker-image-name }}
|
||||
with:
|
||||
shell: bash
|
||||
timeout_minutes: 15
|
||||
timeout_minutes: 30
|
||||
max_attempts: 5
|
||||
retry_wait_seconds: 90
|
||||
command: |
|
||||
|
||||
8
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
8
.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
generated
vendored
@ -65,7 +65,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_9-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cpu-aarch64-test: # Testing
|
||||
@ -185,7 +185,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_10-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cpu-aarch64-test: # Testing
|
||||
@ -305,7 +305,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_11-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cpu-aarch64-test: # Testing
|
||||
@ -425,7 +425,7 @@ jobs:
|
||||
ALPINE_IMAGE: "arm64v8/alpine"
|
||||
build_name: manywheel-py3_12-cpu-aarch64
|
||||
build_environment: linux-aarch64-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cpu-aarch64-test: # Testing
|
||||
|
||||
10
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
10
.github/workflows/generated-linux-s390x-binary-manywheel-nightly.yml
generated
vendored
@ -64,7 +64,7 @@ jobs:
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
build_name: manywheel-py3_9-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_9-cpu-s390x-test: # Testing
|
||||
@ -133,7 +133,7 @@ jobs:
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
build_name: manywheel-py3_10-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_10-cpu-s390x-test: # Testing
|
||||
@ -202,7 +202,7 @@ jobs:
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
build_name: manywheel-py3_11-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_11-cpu-s390x-test: # Testing
|
||||
@ -271,7 +271,7 @@ jobs:
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
build_name: manywheel-py3_12-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_12-cpu-s390x-test: # Testing
|
||||
@ -340,7 +340,7 @@ jobs:
|
||||
ALPINE_IMAGE: "docker.io/s390x/alpine"
|
||||
build_name: manywheel-py3_13-cpu-s390x
|
||||
build_environment: linux-s390x-binary-manywheel
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
secrets:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
manywheel-py3_13-cpu-s390x-test: # Testing
|
||||
|
||||
10
.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
generated
vendored
10
.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
generated
vendored
@ -46,7 +46,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -162,7 +162,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -278,7 +278,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -394,7 +394,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
@ -510,7 +510,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.13"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
# NOTE: These environment variables are put here so that they can be applied on every job equally
|
||||
# They are also here because setting them at a workflow level doesn't give us access to the
|
||||
|
||||
32
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
32
.github/workflows/generated-windows-binary-wheel-nightly.yml
generated
vendored
@ -55,7 +55,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -322,7 +322,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -591,7 +591,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -860,7 +860,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.9"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1393,7 +1393,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1660,7 +1660,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -1929,7 +1929,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2198,7 +2198,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.10"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2731,7 +2731,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -2998,7 +2998,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3267,7 +3267,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -3536,7 +3536,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.11"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4069,7 +4069,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cpu
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4336,7 +4336,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4605,7 +4605,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
@ -4874,7 +4874,7 @@ jobs:
|
||||
GPU_ARCH_TYPE: cuda
|
||||
SKIP_ALL_TESTS: 1
|
||||
DESIRED_PYTHON: "3.12"
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
steps:
|
||||
- name: Display EC2 information
|
||||
shell: bash
|
||||
|
||||
29
.github/workflows/periodic.yml
vendored
29
.github/workflows/periodic.yml
vendored
@ -385,3 +385,32 @@ jobs:
|
||||
build-environment: linux-focal-cuda11.8-py3.9-gcc9-experimental-split-build
|
||||
docker-image: ${{ needs.linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_9-gcc9-experimental-split-build.outputs.test-matrix }}
|
||||
|
||||
linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build:
|
||||
name: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
use_split_build: true
|
||||
build-environment: linux-focal-cuda11.8-py3.10-gcc9
|
||||
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
|
||||
cuda-arch-list: '7.5'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build-test:
|
||||
name: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build-test
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build
|
||||
- target-determination
|
||||
with:
|
||||
timeout-minutes: 360
|
||||
build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
|
||||
docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
|
||||
|
||||
7
.github/workflows/pull.yml
vendored
7
.github/workflows/pull.yml
vendored
@ -280,11 +280,12 @@ jobs:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
build-environment: linux-focal-cuda11.8-py3.10-gcc9
|
||||
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
|
||||
cuda-arch-list: '7.5'
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
|
||||
]}
|
||||
secrets: inherit
|
||||
|
||||
|
||||
28
.github/workflows/trunk.yml
vendored
28
.github/workflows/trunk.yml
vendored
@ -288,31 +288,3 @@ jobs:
|
||||
build-environment: linux-focal-cuda12.4-py3.10-gcc9-experimental-split-build
|
||||
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
|
||||
|
||||
linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build:
|
||||
name: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
|
||||
uses: ./.github/workflows/_linux-build.yml
|
||||
needs: get-label-type
|
||||
with:
|
||||
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
|
||||
use_split_build: true
|
||||
build-environment: linux-focal-cuda11.8-py3.10-gcc9
|
||||
docker-image-name: pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9
|
||||
test-matrix: |
|
||||
{ include: [
|
||||
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.nvidia.gpu" },
|
||||
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.8xlarge.nvidia.gpu" },
|
||||
]}
|
||||
|
||||
linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build-test:
|
||||
name: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build-test
|
||||
uses: ./.github/workflows/_linux-test.yml
|
||||
needs:
|
||||
- linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build
|
||||
- target-determination
|
||||
with:
|
||||
timeout-minutes: 360
|
||||
build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build
|
||||
docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }}
|
||||
|
||||
@ -241,7 +241,9 @@ exclude_patterns = [
|
||||
'c10/util/*inl.h',
|
||||
'c10/test/**/*.h',
|
||||
'third_party/**/*',
|
||||
'torch/csrc/api/**',
|
||||
'torch/csrc/api/include/torch/nn/modules/common.h',
|
||||
'torch/csrc/api/include/torch/linalg.h',
|
||||
'torch/csrc/api/include/torch/nn/pimpl-inl.h',
|
||||
'torch/csrc/autograd/generated/**',
|
||||
'torch/csrc/distributed/**/*',
|
||||
'torch/csrc/dynamo/eval_frame.h',
|
||||
@ -1230,87 +1232,6 @@ exclude_patterns = [
|
||||
'torch/fft/__init__.py',
|
||||
'torch/func/__init__.py',
|
||||
'torch/futures/__init__.py',
|
||||
'torch/fx/__init__.py',
|
||||
'torch/fx/_compatibility.py',
|
||||
'torch/fx/_symbolic_trace.py',
|
||||
'torch/fx/annotate.py',
|
||||
'torch/fx/config.py',
|
||||
'torch/fx/experimental/__init__.py',
|
||||
'torch/fx/experimental/accelerator_partitioner.py',
|
||||
'torch/fx/experimental/const_fold.py',
|
||||
'torch/fx/experimental/debug.py',
|
||||
'torch/fx/experimental/graph_gradual_typechecker.py',
|
||||
'torch/fx/experimental/merge_matmul.py',
|
||||
'torch/fx/experimental/meta_tracer.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/__init__.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/constraint.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/constraint_generator.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/constraint_transformation.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/operation.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/transform_to_z3.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/util.py',
|
||||
'torch/fx/experimental/migrate_gradual_types/z3_types.py',
|
||||
'torch/fx/experimental/normalize.py',
|
||||
'torch/fx/experimental/optimization.py',
|
||||
'torch/fx/experimental/partitioner_utils.py',
|
||||
'torch/fx/experimental/refinement_types.py',
|
||||
'torch/fx/experimental/rewriter.py',
|
||||
'torch/fx/experimental/schema_type_annotation.py',
|
||||
'torch/fx/experimental/unification/__init__.py',
|
||||
'torch/fx/experimental/unification/core.py',
|
||||
'torch/fx/experimental/unification/dispatch.py',
|
||||
'torch/fx/experimental/unification/match.py',
|
||||
'torch/fx/experimental/unification/more.py',
|
||||
'torch/fx/experimental/unification/multipledispatch/__init__.py',
|
||||
'torch/fx/experimental/unification/multipledispatch/conflict.py',
|
||||
'torch/fx/experimental/unification/multipledispatch/core.py',
|
||||
'torch/fx/experimental/unification/multipledispatch/dispatcher.py',
|
||||
'torch/fx/experimental/unification/multipledispatch/utils.py',
|
||||
'torch/fx/experimental/unification/multipledispatch/variadic.py',
|
||||
'torch/fx/experimental/unification/unification_tools.py',
|
||||
'torch/fx/experimental/unification/utils.py',
|
||||
'torch/fx/experimental/unification/variable.py',
|
||||
'torch/fx/experimental/unify_refinements.py',
|
||||
'torch/fx/graph.py',
|
||||
'torch/fx/graph_module.py',
|
||||
'torch/fx/interpreter.py',
|
||||
'torch/fx/node.py',
|
||||
'torch/fx/operator_schemas.py',
|
||||
'torch/fx/passes/__init__.py',
|
||||
'torch/fx/passes/annotate_getitem_nodes.py',
|
||||
'torch/fx/passes/backends/__init__.py',
|
||||
'torch/fx/passes/backends/cudagraphs.py',
|
||||
'torch/fx/passes/dialect/__init__.py',
|
||||
'torch/fx/passes/dialect/common/__init__.py',
|
||||
'torch/fx/passes/dialect/common/cse_pass.py',
|
||||
'torch/fx/passes/fake_tensor_prop.py',
|
||||
'torch/fx/passes/graph_drawer.py',
|
||||
'torch/fx/passes/graph_manipulation.py',
|
||||
'torch/fx/passes/infra/__init__.py',
|
||||
'torch/fx/passes/infra/partitioner.py',
|
||||
'torch/fx/passes/infra/pass_base.py',
|
||||
'torch/fx/passes/infra/pass_manager.py',
|
||||
'torch/fx/passes/net_min_base.py',
|
||||
'torch/fx/passes/operator_support.py',
|
||||
'torch/fx/passes/param_fetch.py',
|
||||
'torch/fx/passes/pass_manager.py',
|
||||
'torch/fx/passes/reinplace.py',
|
||||
'torch/fx/passes/shape_prop.py',
|
||||
'torch/fx/passes/split_module.py',
|
||||
'torch/fx/passes/split_utils.py',
|
||||
'torch/fx/passes/splitter_base.py',
|
||||
'torch/fx/passes/tests/__init__.py',
|
||||
'torch/fx/passes/tests/test_pass_manager.py',
|
||||
'torch/fx/passes/tools_common.py',
|
||||
'torch/fx/passes/utils/__init__.py',
|
||||
'torch/fx/passes/utils/common.py',
|
||||
'torch/fx/passes/utils/fuser_utils.py',
|
||||
'torch/fx/passes/utils/matcher_utils.py',
|
||||
'torch/fx/passes/utils/source_matcher_utils.py',
|
||||
'torch/fx/proxy.py',
|
||||
'torch/fx/subgraph_rewriter.py',
|
||||
'torch/fx/tensor_type.py',
|
||||
'torch/fx/traceback.py',
|
||||
'torch/linalg/__init__.py',
|
||||
'torch/monitor/__init__.py',
|
||||
'torch/nested/__init__.py',
|
||||
|
||||
@ -136,7 +136,7 @@ inline bool _apply_preamble(ArrayRef<Tensor> tensors) {
|
||||
checkDeviceType("CPU_tensor_apply", tensors, kCPU);
|
||||
checkLayout("CPU_tensor_apply", tensors, kStrided);
|
||||
if (!_all_equal_numel(tensors))
|
||||
AT_ERROR(_all_equal_numel_error(tensors));
|
||||
TORCH_CHECK(false, _all_equal_numel_error(tensors));
|
||||
// An empty tensor has no elements
|
||||
for (auto& t : tensors)
|
||||
if (t.numel() == 0)
|
||||
|
||||
@ -12,11 +12,11 @@
|
||||
namespace at {
|
||||
|
||||
static cpu_fixed_malloc(void*, ptrdiff_t) {
|
||||
AT_ERROR("attempting to resize a tensor view of an external blob");
|
||||
TORCH_CHECK(false, "attempting to resize a tensor view of an external blob");
|
||||
}
|
||||
|
||||
static cpu_fixed_realloc(void*, void*, ptrdiff_t) {
|
||||
AT_ERROR("attempting to resize a tensor view of an external blob");
|
||||
TORCH_CHECK(false, "attempting to resize a tensor view of an external blob");
|
||||
}
|
||||
|
||||
static cpu_fixed_free(void* state, void* allocation) {
|
||||
|
||||
@ -189,7 +189,7 @@ void CPUGeneratorImpl::set_state(const c10::TensorImpl& new_state) {
|
||||
double_normal_sample = std::optional<double>(legacy_pod->normal_y);
|
||||
}
|
||||
} else {
|
||||
AT_ERROR("Expected either a CPUGeneratorImplStateLegacy of size ", size_legacy,
|
||||
TORCH_CHECK(false, "Expected either a CPUGeneratorImplStateLegacy of size ", size_legacy,
|
||||
" or a CPUGeneratorImplState of size ", size_current,
|
||||
" but found the input RNG state size to be ", new_state_size);
|
||||
}
|
||||
|
||||
@ -43,19 +43,9 @@ class TORCH_API Context {
|
||||
|
||||
if (device_type == at::kCPU) {
|
||||
return at::detail::getDefaultCPUGenerator();
|
||||
} else if (device_type == at::kCUDA) {
|
||||
return at::detail::getCUDAHooks().getDefaultCUDAGenerator(device.index());
|
||||
} else if (device_type == at::kMPS) {
|
||||
return at::detail::getMPSHooks().getDefaultMPSGenerator();
|
||||
} else if (device_type == at::kXPU) {
|
||||
return at::detail::getXPUHooks().getDefaultXPUGenerator(device.index());
|
||||
} else if (device_type == at::kIPU) {
|
||||
return at::detail::getIPUHooks().getDefaultIPUGenerator(device.index());
|
||||
} else if (device_type == at::kPrivateUse1) {
|
||||
return at::detail::getPrivateUse1Hooks().getDefaultGenerator(
|
||||
device.index());
|
||||
} else {
|
||||
AT_ERROR(c10::DeviceTypeName(device_type), " device type not enabled.");
|
||||
return getAcceleratorHooksInterface(device_type)
|
||||
.getDefaultGenerator(device.index());
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,8 +67,10 @@ class TORCH_API Context {
|
||||
} else if (device_type == at::kHIP) {
|
||||
return at::detail::getHIPHooks();
|
||||
} else {
|
||||
AT_ERROR(
|
||||
c10::DeviceTypeName(device_type), " device type not an accelerator.");
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
c10::DeviceTypeName(device_type),
|
||||
" device type not an accelerator.");
|
||||
}
|
||||
}
|
||||
|
||||
@ -349,18 +341,28 @@ class TORCH_API Context {
|
||||
|
||||
// Preserved for BC
|
||||
void lazyInitCUDA() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"lazyInitCUDA is deprecated. Please use lazyInitDevice(at::kCUDA) instead.")
|
||||
lazyInitDevice(at::kCUDA);
|
||||
}
|
||||
void lazyInitHIP() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"lazyInitHIP is deprecated. Please use lazyInitDevice(at::kHIP) instead.")
|
||||
lazyInitDevice(at::kHIP);
|
||||
}
|
||||
void lazyInitXPU() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"lazyInitXPU is deprecated. Please use lazyInitDevice(at::kXPU) instead.")
|
||||
lazyInitDevice(at::kXPU);
|
||||
}
|
||||
void lazyInitMTIA() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"lazyInitMTIA is deprecated. Please use lazyInitDevice(at::kMTIA) instead.")
|
||||
lazyInitDevice(at::kMTIA);
|
||||
}
|
||||
void lazyInitPrivateUse1() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"lazyInitPrivateUse1 is deprecated. Please use lazyInitDevice(at::kPrivateUse1) instead.")
|
||||
lazyInitDevice(at::kPrivateUse1);
|
||||
}
|
||||
|
||||
|
||||
@ -13,10 +13,12 @@ namespace at {
|
||||
TORCH_API ScalarType toScalarType(const DLDataType& dtype);
|
||||
TORCH_API DLManagedTensor* toDLPack(const Tensor& src);
|
||||
TORCH_API Tensor fromDLPack(DLManagedTensor* src);
|
||||
C10_DEPRECATED_MESSAGE("Please migrate to a non-const variant")
|
||||
inline Tensor fromDLPack(const DLManagedTensor* src) {
|
||||
|
||||
[[deprecated("Please migrate to a non-const variant")]] inline Tensor fromDLPack(
|
||||
const DLManagedTensor* src) {
|
||||
return fromDLPack(const_cast<DLManagedTensor*>(src));
|
||||
}
|
||||
|
||||
TORCH_API Tensor
|
||||
fromDLPack(DLManagedTensor* src, std::function<void(void*)> deleter);
|
||||
TORCH_API DLDataType getDLDataType(const Tensor& t);
|
||||
|
||||
@ -55,7 +55,8 @@ TORCH_API void record_kernel_function_dtype(std::string name);
|
||||
do { \
|
||||
if constexpr (!at::should_include_kernel_dtype( \
|
||||
at_dispatch_name, enum_type)) { \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
"dtype '", \
|
||||
toString(enum_type), \
|
||||
"' not selected for kernel tag ", \
|
||||
@ -103,23 +104,23 @@ inline at::ScalarType scalar_type(at::ScalarType s) {
|
||||
return s;
|
||||
}
|
||||
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
[[deprecated(
|
||||
"passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, "
|
||||
"pass an at::ScalarType instead")
|
||||
inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) {
|
||||
"pass an at::ScalarType instead")]] inline at::ScalarType
|
||||
scalar_type(const at::DeprecatedTypeProperties& t) {
|
||||
return t.scalarType();
|
||||
}
|
||||
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
[[deprecated(
|
||||
"AT_DISPATCH_ALL_TYPES_AND_HALF is deprecated, "
|
||||
"use AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, ...) instead")
|
||||
inline void deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF() {}
|
||||
"use AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, ...) instead")]] inline void
|
||||
deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF() {}
|
||||
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
[[deprecated(
|
||||
"AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX is deprecated, "
|
||||
"use AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(at::ScalarType::Half, ...) "
|
||||
"instead")
|
||||
inline void deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX() {}
|
||||
"instead")]] inline void
|
||||
deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX() {}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@ -220,7 +221,8 @@ inline void deprecated_AT_DISPATCH_ALL_TYPES_AND_HALF_AND_COMPLEX() {}
|
||||
switch (_st) { \
|
||||
__VA_ARGS__ \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
'"', \
|
||||
at_dispatch_name, \
|
||||
"\" not implemented for '", \
|
||||
|
||||
@ -78,7 +78,7 @@ inline void check_defined(
|
||||
const char* api_name) {
|
||||
for (auto& t : tensors) {
|
||||
if (!t.get().defined()) {
|
||||
AT_ERROR(api_name, "(...) called with an undefined Tensor");
|
||||
TORCH_CHECK(false, api_name, "(...) called with an undefined Tensor");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -231,6 +231,7 @@ Tensor FunctionalInverses::slice_Tensor_inverse(const Tensor& base, const Tensor
|
||||
}
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
||||
Tensor FunctionalInverses::split_Tensor_inverse(const Tensor& base, const Tensor& mutated_view, InverseReturnMode inverse_return_mode, int64_t mutated_view_idx, c10::SymInt split_size, int64_t dim) {
|
||||
// It would be nice if this logic could be re-used from autograd's split_backward(), but I don't think it can.
|
||||
// For functionalization, we have only have one of the tensors from the TensorList outputed by split(), and we want to layer i
|
||||
@ -452,6 +453,7 @@ Tensor FunctionalInverses::chunk_inverse(const at::Tensor & base, const at::Tens
|
||||
return split_with_sizes_inverse(base, mutated_view, inverse_return_mode, mutated_view_idx, split_sizes, dim);
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
||||
Tensor FunctionalInverses::narrow_inverse(const at::Tensor & base, const at::Tensor & mutated_view, InverseReturnMode inverse_return_mode, int dim, c10::SymInt start, c10::SymInt length) {
|
||||
if (inverse_return_mode == InverseReturnMode::AlwaysView) {
|
||||
// NB: assumes mutated_view is a narrowed view of base.
|
||||
|
||||
@ -33,7 +33,7 @@ inline void infer_size_impl(
|
||||
} else if (shape[dim] >= 0) {
|
||||
newsize *= shape[dim];
|
||||
} else {
|
||||
AT_ERROR("invalid shape dimension ", shape[dim]);
|
||||
TORCH_CHECK(false, "invalid shape dimension ", shape[dim]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -45,15 +45,15 @@ struct TORCH_API OpaqueTensorImpl : public TensorImpl {
|
||||
}
|
||||
|
||||
void set_size(int64_t dim, int64_t new_size) override {
|
||||
AT_ERROR("opaque tensors do not have set_size");
|
||||
TORCH_CHECK(false, "opaque tensors do not have set_size");
|
||||
}
|
||||
|
||||
void set_stride(int64_t dim, int64_t new_stride) override {
|
||||
AT_ERROR("opaque tensors do not have set_stride");
|
||||
TORCH_CHECK(false, "opaque tensors do not have set_stride");
|
||||
}
|
||||
|
||||
void set_storage_offset(int64_t storage_offset) override {
|
||||
AT_ERROR("opaque tensors do not have set_storage_offset");
|
||||
TORCH_CHECK(false, "opaque tensors do not have set_storage_offset");
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
@ -23,7 +23,8 @@
|
||||
case kSparseBsc: \
|
||||
return __VA_ARGS__(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse compressed tensor layout but got ", \
|
||||
the_layout); \
|
||||
@ -42,7 +43,8 @@
|
||||
case kSparseBsc: \
|
||||
return (COLUMN_DIM_ACTION)(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse compressed tensor layout but got ", \
|
||||
the_layout); \
|
||||
@ -61,7 +63,8 @@
|
||||
case kSparseBsc: \
|
||||
return (BLOCK_ACTION)(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse compressed tensor layout but got ", \
|
||||
the_layout); \
|
||||
@ -77,7 +80,8 @@
|
||||
case kSparseBsr: \
|
||||
return (ROW_DIM_ACTION)(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse row compressed tensor layout but got ", \
|
||||
the_layout); \
|
||||
@ -93,7 +97,8 @@
|
||||
case kSparseBsc: \
|
||||
return (COL_DIM_ACTION)(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse column compressed tensor layout but got ", \
|
||||
the_layout); \
|
||||
@ -108,7 +113,8 @@
|
||||
case kSparseCsc: \
|
||||
return (ACTION)(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse compressed (non-block) tensor layout but got ", \
|
||||
the_layout); \
|
||||
@ -123,7 +129,8 @@
|
||||
case kSparseBsc: \
|
||||
return (ACTION)(); \
|
||||
default: \
|
||||
AT_ERROR( \
|
||||
TORCH_CHECK( \
|
||||
false, \
|
||||
NAME, \
|
||||
" expected sparse compressed block tensor layout but got ", \
|
||||
the_layout); \
|
||||
|
||||
@ -57,13 +57,13 @@ void SparseTensorImpl::release_resources() {
|
||||
}
|
||||
|
||||
void SparseTensorImpl::set_size(int64_t dim, int64_t new_size) {
|
||||
AT_ERROR("sparse tensors do not have set_size");
|
||||
TORCH_CHECK(false, "sparse tensors do not have set_size");
|
||||
}
|
||||
void SparseTensorImpl::set_stride(int64_t dim, int64_t new_stride) {
|
||||
AT_ERROR("sparse tensors do not have set_stride");
|
||||
TORCH_CHECK(false, "sparse tensors do not have set_stride");
|
||||
}
|
||||
void SparseTensorImpl::set_storage_offset(int64_t storage_offset) {
|
||||
AT_ERROR("sparse tensors do not have set_storage_offset");
|
||||
TORCH_CHECK(false, "sparse tensors do not have set_storage_offset");
|
||||
}
|
||||
#ifdef DEBUG
|
||||
bool SparseTensorImpl::has_storage() const {
|
||||
|
||||
@ -155,7 +155,7 @@ void checkSameGPU(CheckedFrom c, const TensorArg& t1, const TensorArg& t2) {
|
||||
}
|
||||
oss << "but expected " << ((!t1->is_cpu() && !t2->is_cpu()) ? "them" : "it")
|
||||
<< " to be on GPU (while checking arguments for " << c << ")";
|
||||
AT_ERROR(oss.str());
|
||||
TORCH_CHECK(false, oss.str());
|
||||
}
|
||||
TORCH_CHECK(
|
||||
t1->get_device() == t2->get_device(),
|
||||
@ -200,7 +200,7 @@ void checkScalarTypes(CheckedFrom c, const TensorArg& t,
|
||||
}
|
||||
oss << "; but got " << t->toString()
|
||||
<< " instead (while checking arguments for " << c << ")";
|
||||
AT_ERROR(oss.str());
|
||||
TORCH_CHECK(false, oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -36,7 +36,8 @@ inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
|
||||
for (const auto i : c10::irange(tensors.size())) {
|
||||
const auto& expr = tensors[i];
|
||||
if (expr.layout() != Layout::Strided) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Expected dense tensor but got ",
|
||||
expr.layout(),
|
||||
" for sequence element ",
|
||||
@ -48,7 +49,8 @@ inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
|
||||
"'");
|
||||
}
|
||||
if (expr.device().type() != device_type) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Expected object of device type ",
|
||||
device_type,
|
||||
" but got device type ",
|
||||
@ -62,7 +64,8 @@ inline std::vector<TensorImpl*> checked_dense_tensor_list_unwrap(
|
||||
"'");
|
||||
}
|
||||
if (expr.scalar_type() != scalar_type) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Expected object of scalar type ",
|
||||
scalar_type,
|
||||
" but got scalar type ",
|
||||
@ -96,7 +99,8 @@ std::array<int64_t, N> check_intlist(
|
||||
return res;
|
||||
}
|
||||
if (list.size() != N) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Expected a list of ",
|
||||
N,
|
||||
" ints but got ",
|
||||
|
||||
@ -149,7 +149,7 @@ Banned functions
|
||||
*******************************/
|
||||
|
||||
static Tensor binary_cross_entropy_banned(const Tensor &, const Tensor &, const std::optional<Tensor>&, int64_t) {
|
||||
AT_ERROR("torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are unsafe to autocast.\n"
|
||||
TORCH_CHECK(false, "torch.nn.functional.binary_cross_entropy and torch.nn.BCELoss are unsafe to autocast.\n"
|
||||
"Many models use a sigmoid layer right before the binary cross entropy layer.\n"
|
||||
"In this case, combine the two layers using torch.nn.functional.binary_cross_entropy_with_logits\n"
|
||||
"or torch.nn.BCEWithLogitsLoss. binary_cross_entropy_with_logits and BCEWithLogits are\n"
|
||||
|
||||
@ -23,36 +23,37 @@ TORCH_API bool is_autocast_cache_enabled();
|
||||
TORCH_API void set_autocast_cache_enabled(bool enabled);
|
||||
|
||||
// deprecated CUDA-specific autocast APIs
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
"at::autocast::is_enabled() is deprecated. Please use at::autocast::is_autocast_enabled(at::kCUDA) instead.")
|
||||
TORCH_API inline bool is_enabled() {
|
||||
[[deprecated(
|
||||
"at::autocast::is_enabled() is deprecated. Please use at::autocast::is_autocast_enabled(at::kCUDA) instead.")]] TORCH_API inline bool
|
||||
is_enabled() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"at::autocast::",
|
||||
__func__,
|
||||
"() is deprecated. Please use at::autocast::is_autocast_enabled(at::kCUDA) instead.")
|
||||
return is_autocast_enabled(at::kCUDA);
|
||||
}
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
"at::autocast::set_enabled(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(at::kCUDA, enabled) instead.")
|
||||
TORCH_API inline void set_enabled(bool enabled) {
|
||||
[[deprecated(
|
||||
"at::autocast::set_enabled(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(at::kCUDA, enabled) instead.")]] TORCH_API inline void
|
||||
set_enabled(bool enabled) {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"at::autocast::",
|
||||
__func__,
|
||||
"(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(at::kCUDA, enabled) instead.")
|
||||
set_autocast_enabled(at::kCUDA, enabled);
|
||||
}
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
"at::autocast::get_autocast_gpu_dtype() is deprecated. Please use at::autocast::get_autocast_dtype(at::kCUDA) instead.")
|
||||
TORCH_API inline at::ScalarType get_autocast_gpu_dtype() {
|
||||
[[deprecated(
|
||||
"at::autocast::get_autocast_gpu_dtype() is deprecated. Please use at::autocast::get_autocast_dtype(at::kCUDA) instead.")]] TORCH_API inline at::
|
||||
ScalarType
|
||||
get_autocast_gpu_dtype() {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"at::autocast::",
|
||||
__func__,
|
||||
"() is deprecated. Please use at::autocast::get_autocast_dtype(at::kCUDA) instead.")
|
||||
return get_autocast_dtype(at::kCUDA);
|
||||
}
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
"at::autocast::set_autocast_gpu_dtype(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(at::kCUDA, dtype) instead.")
|
||||
TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) {
|
||||
[[deprecated(
|
||||
"at::autocast::set_autocast_gpu_dtype(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(at::kCUDA, dtype) instead.")]] TORCH_API inline void
|
||||
set_autocast_gpu_dtype(at::ScalarType dtype) {
|
||||
TORCH_WARN_DEPRECATION(
|
||||
"at::autocast::",
|
||||
__func__,
|
||||
@ -61,11 +62,10 @@ TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) {
|
||||
}
|
||||
|
||||
#define DECLARE_DEPRECATED_AUTOCAST_APIS(name, device_type) \
|
||||
C10_DEPRECATED_MESSAGE( \
|
||||
[[deprecated( \
|
||||
"at::autocast::is_" #name \
|
||||
"_enabled() is deprecated. Please use at::autocast::is_autocast_enabled(" #device_type \
|
||||
") instead.") \
|
||||
TORCH_API inline bool is_##name##_enabled() { \
|
||||
") instead.")]] TORCH_API inline bool is_##name##_enabled() { \
|
||||
TORCH_WARN_DEPRECATION( \
|
||||
"at::autocast::", \
|
||||
__func__, \
|
||||
@ -74,11 +74,11 @@ TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) {
|
||||
return is_autocast_enabled(device_type); \
|
||||
} \
|
||||
\
|
||||
C10_DEPRECATED_MESSAGE( \
|
||||
[[deprecated( \
|
||||
"at::autocast::set_" #name \
|
||||
"_enabled(enabled) is deprecated. Please use at::autocast::set_autocast_enabled(" #device_type \
|
||||
", enabled) instead.") \
|
||||
TORCH_API inline void set_##name##_enabled(bool enabled) { \
|
||||
", enabled) instead.")]] TORCH_API inline void \
|
||||
set_##name##_enabled(bool enabled) { \
|
||||
TORCH_WARN_DEPRECATION( \
|
||||
"at::autocast::", \
|
||||
__func__, \
|
||||
@ -87,11 +87,11 @@ TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) {
|
||||
set_autocast_enabled(device_type, enabled); \
|
||||
} \
|
||||
\
|
||||
C10_DEPRECATED_MESSAGE( \
|
||||
[[deprecated( \
|
||||
"at::autocast::get_autocast_" #name \
|
||||
"_dtype() is deprecated. Please use at::autocast::get_autocast_dtype(" #device_type \
|
||||
") instead.") \
|
||||
TORCH_API inline at::ScalarType get_autocast_##name##_dtype() { \
|
||||
") instead.")]] TORCH_API inline at::ScalarType \
|
||||
get_autocast_##name##_dtype() { \
|
||||
TORCH_WARN_DEPRECATION( \
|
||||
"at::autocast::", \
|
||||
__func__, \
|
||||
@ -100,11 +100,11 @@ TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) {
|
||||
return get_autocast_dtype(device_type); \
|
||||
} \
|
||||
\
|
||||
C10_DEPRECATED_MESSAGE( \
|
||||
[[deprecated( \
|
||||
"at::autocast::set_autocast_" #name \
|
||||
"_dtype(dtype) is deprecated. Please use at::autocast::set_autocast_dtype(" #device_type \
|
||||
", dtype) instead.") \
|
||||
TORCH_API inline void set_autocast_##name##_dtype(at::ScalarType dtype) { \
|
||||
", dtype) instead.")]] TORCH_API inline void \
|
||||
set_autocast_##name##_dtype(at::ScalarType dtype) { \
|
||||
TORCH_WARN_DEPRECATION( \
|
||||
"at::autocast::", \
|
||||
__func__, \
|
||||
@ -211,7 +211,7 @@ inline at::ScalarType prioritize(
|
||||
const Tensor& nextArg,
|
||||
c10::DeviceType device_type = c10::DeviceType::CUDA) {
|
||||
if (current == at::kDouble) {
|
||||
AT_ERROR("promote type is double in at::autocast::prioritize");
|
||||
TORCH_CHECK(false, "promote type is double in at::autocast::prioritize");
|
||||
return current;
|
||||
}
|
||||
at::ScalarType lower_precision_fp =
|
||||
@ -225,7 +225,8 @@ inline at::ScalarType prioritize(
|
||||
} else if (current == lower_precision_fp && next == lower_precision_fp) {
|
||||
return lower_precision_fp;
|
||||
} else {
|
||||
AT_ERROR("Unexpected floating ScalarType in at::autocast::prioritize");
|
||||
TORCH_CHECK(
|
||||
false, "Unexpected floating ScalarType in at::autocast::prioritize");
|
||||
return current;
|
||||
}
|
||||
} else {
|
||||
|
||||
@ -95,11 +95,9 @@ struct uniform_int_distribution {
|
||||
template <typename T>
|
||||
struct uniform_real_distribution {
|
||||
|
||||
C10_HOST_DEVICE inline uniform_real_distribution(T from, T to) {
|
||||
C10_HOST_DEVICE inline uniform_real_distribution(T from, T to) : from_(from), to_(to) {
|
||||
TORCH_CHECK_IF_NOT_ON_CUDA(from <= to);
|
||||
TORCH_CHECK_IF_NOT_ON_CUDA(to - from <= std::numeric_limits<T>::max());
|
||||
from_ = from;
|
||||
to_ = to;
|
||||
}
|
||||
|
||||
template <typename RNG>
|
||||
@ -186,10 +184,8 @@ DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(float);
|
||||
template <typename T>
|
||||
struct normal_distribution {
|
||||
|
||||
C10_HOST_DEVICE inline normal_distribution(T mean_in, T stdv_in) {
|
||||
C10_HOST_DEVICE inline normal_distribution(T mean_in, T stdv_in) : mean(mean_in), stdv(stdv_in) {
|
||||
TORCH_CHECK_IF_NOT_ON_CUDA(stdv_in >= 0, "stdv_in must be positive: ", stdv_in);
|
||||
mean = mean_in;
|
||||
stdv = stdv_in;
|
||||
}
|
||||
|
||||
template <typename RNG>
|
||||
@ -236,9 +232,8 @@ template <> struct DiscreteDistributionType<double> { using type = double; };
|
||||
template <typename T>
|
||||
struct bernoulli_distribution {
|
||||
|
||||
C10_HOST_DEVICE inline bernoulli_distribution(T p_in) {
|
||||
C10_HOST_DEVICE inline bernoulli_distribution(T p_in) : p(p_in) {
|
||||
TORCH_CHECK_IF_NOT_ON_CUDA(p_in >= 0 && p_in <= 1);
|
||||
p = p_in;
|
||||
}
|
||||
|
||||
template <typename RNG>
|
||||
@ -257,9 +252,8 @@ struct bernoulli_distribution {
|
||||
template <typename T>
|
||||
struct geometric_distribution {
|
||||
|
||||
C10_HOST_DEVICE inline geometric_distribution(T p_in) {
|
||||
C10_HOST_DEVICE inline geometric_distribution(T p_in) : p(p_in) {
|
||||
TORCH_CHECK_IF_NOT_ON_CUDA(p_in > 0 && p_in < 1);
|
||||
p = p_in;
|
||||
}
|
||||
|
||||
template <typename RNG>
|
||||
@ -317,10 +311,8 @@ struct cauchy_distribution {
|
||||
template <typename T>
|
||||
struct lognormal_distribution {
|
||||
|
||||
C10_HOST_DEVICE inline lognormal_distribution(T mean_in, T stdv_in) {
|
||||
C10_HOST_DEVICE inline lognormal_distribution(T mean_in, T stdv_in) : mean(mean_in), stdv(stdv_in) {
|
||||
TORCH_CHECK_IF_NOT_ON_CUDA(stdv_in > 0);
|
||||
mean = mean_in;
|
||||
stdv = stdv_in;
|
||||
}
|
||||
|
||||
template<typename RNG>
|
||||
|
||||
@ -263,9 +263,8 @@ public:
|
||||
// Can't put this directly into the macro function args because of commas
|
||||
#define AT_X GenericPackedTensorAccessor<T, N, PtrTraits, index_t>
|
||||
|
||||
// Old name for `GenericPackedTensorAccessor`
|
||||
template <typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
|
||||
C10_DEFINE_DEPRECATED_USING(PackedTensorAccessor, AT_X)
|
||||
using PackedTensorAccessor [[deprecated("Old name for `GenericPackedTensorAccessor`")]] = AT_X;
|
||||
|
||||
#undef AT_X
|
||||
|
||||
|
||||
@ -28,7 +28,7 @@ struct TORCH_API EnumType : public NamedType {
|
||||
std::move(enum_names_values),
|
||||
std::move(cu)));
|
||||
default:
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Cannot create Enum with value type '",
|
||||
value->str(),
|
||||
"', only int, float and string are supported");
|
||||
|
||||
@ -55,7 +55,7 @@ inline void FunctionSchema::checkAndNormalizeInputs(
|
||||
inputs.push_back(*argument.default_value());
|
||||
continue;
|
||||
}
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
name(),
|
||||
"() is missing value for argument '",
|
||||
argument.name(),
|
||||
|
||||
@ -756,7 +756,7 @@ IValueComparator getLessThanComparator(const IValue& v) {
|
||||
torch::jit::Function* lt_func =
|
||||
checkObjectSortSchema(v.type()->expect<ClassType>(), why_not);
|
||||
if (!lt_func) {
|
||||
AT_ERROR(why_not.str());
|
||||
TORCH_CHECK(false, why_not.str());
|
||||
}
|
||||
|
||||
return [lt_func](const IValue& a, const IValue& b) {
|
||||
@ -772,7 +772,7 @@ IValueComparator getLessThanComparator(const IValue& v) {
|
||||
};
|
||||
}
|
||||
|
||||
AT_ERROR("IValues of type: ", v.tagKind(), " are not comparable");
|
||||
TORCH_CHECK(false, "IValues of type: ", v.tagKind(), " are not comparable");
|
||||
}
|
||||
|
||||
IValueComparator getGreaterThanComparator(const IValue& v) {
|
||||
@ -967,7 +967,7 @@ IValue IValue::deepcopy(
|
||||
copy = *this;
|
||||
} break;
|
||||
default: {
|
||||
AT_ERROR("Can't deepcopy IValue with tag: ", tagKind());
|
||||
TORCH_CHECK(false, "Can't deepcopy IValue with tag: ", tagKind());
|
||||
}
|
||||
}
|
||||
// NB: this doesn't work if an object contains itself, and it may
|
||||
@ -1050,7 +1050,7 @@ c10::intrusive_ptr<ivalue::Object> ivalue::Object::deepcopy(
|
||||
}
|
||||
err << ". Please define serialization methods via def_pickle() for "
|
||||
"this class.";
|
||||
AT_ERROR(err.str());
|
||||
TORCH_CHECK(false, err.str());
|
||||
}
|
||||
object->setSlot(i, slots_[i].deepcopy(memo, device));
|
||||
}
|
||||
|
||||
@ -809,12 +809,9 @@ struct TORCH_API IValue final {
|
||||
IValue(c10::Dict<Key, Value> v);
|
||||
|
||||
template <class Key, class Value>
|
||||
/// \cond
|
||||
/// DOXYGEN_CANNOT_HANDLE_CONSTRUCTORS_WITH_MACROS_SO_EXCLUDE_THIS_LINE_FROM_DOXYGEN
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
"IValues based on std::unordered_map<K, V> are slow and deprecated. Please use c10::Dict<K, V> instead.")
|
||||
/// \endcond
|
||||
IValue(std::unordered_map<Key, Value> v);
|
||||
[[deprecated(
|
||||
"IValues based on std::unordered_map<K, V> are slow and deprecated. Please use c10::Dict<K, V> instead.")]]
|
||||
IValue(std::unordered_map<Key, Value> v);
|
||||
|
||||
template <class T, enable_if_ivalue_constructible<T> = nullptr>
|
||||
IValue(std::optional<T> v);
|
||||
@ -1163,7 +1160,7 @@ struct TORCH_API IValue final {
|
||||
// this value different (e.g. using NaN boxing), and this would make it more
|
||||
// costly to determine the tag for all types vs just determining if something
|
||||
// is a particular type. Instead we want clients to use the `isX` methods when
|
||||
// possible. If for perf. reasons you really, absolutely, must have a jump
|
||||
// possible. If for performance reasons you really, absolutely, must have a jump
|
||||
// table, then we can revisit this.
|
||||
enum class Tag : uint32_t {
|
||||
#define DEFINE_TAG(x) x,
|
||||
|
||||
@ -863,6 +863,19 @@ struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target {
|
||||
Future& operator=(const Future&) = delete;
|
||||
Future& operator=(Future&&) = delete;
|
||||
|
||||
// Destructor
|
||||
// Explicitly destroy events under device guard, otherwise it can lead to
|
||||
// extra context being created on device 0. Reason: python garbage collector
|
||||
// calls this destructor, but python GC does not have a device context, so a
|
||||
// "default" one (usually on device 0) could be created when we go down the
|
||||
// line of event destroy.
|
||||
~Future() override {
|
||||
while (!events_.empty()) {
|
||||
c10::OptionalDeviceGuard deviceGuard(events_.back().device());
|
||||
events_.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
struct TORCH_API FutureError final : public std::exception {
|
||||
explicit FutureError(std::string&& error_msg_)
|
||||
: error_msg(std::move(error_msg_)) {}
|
||||
@ -1758,8 +1771,8 @@ struct _fake_type {};
|
||||
template <class Elem>
|
||||
// TODO this is deprecated but we don't throw a warning because a lot of ops in
|
||||
// native_functions.yaml still return std::vector.
|
||||
// C10_DEPRECATED_MESSAGE("IValues based on std::vector<T> are potentially slow
|
||||
// and deprecated. Please use torch::List<T> instead.")
|
||||
// [[deprecated("IValues based on std::vector<T> are potentially slow
|
||||
// and deprecated. Please use torch::List<T> instead.")]]
|
||||
std::vector<Elem> generic_to(IValue ivalue, _fake_type<std::vector<Elem>>) {
|
||||
// We need to do a deep copy of the vector because there might be other
|
||||
// references to this same IValue that also use the list. We can't just
|
||||
@ -1895,8 +1908,8 @@ c10::Dict<Key, Value> generic_to(
|
||||
}
|
||||
|
||||
template <typename K, typename V>
|
||||
C10_DEPRECATED_MESSAGE(
|
||||
"IValues based on std::unordered_map are slow and deprecated. Please use c10::Dict<K, V> instead.")
|
||||
[[deprecated(
|
||||
"IValues based on std::unordered_map are slow and deprecated. Please use c10::Dict<K, V> instead.")]]
|
||||
std::unordered_map<K, V> generic_to(
|
||||
IValue ivalue,
|
||||
_fake_type<std::unordered_map<K, V>>) {
|
||||
|
||||
@ -938,7 +938,7 @@ struct TORCH_API DictType : public SharedType {
|
||||
case TypeKind::DeviceObjType:
|
||||
return DictTypePtr(new DictType(std::move(key), std::move(value)));
|
||||
default:
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Cannot create dict for key type '",
|
||||
key->str(),
|
||||
"', only int, float, complex, Tensor, device and string keys are supported");
|
||||
|
||||
@ -585,7 +585,7 @@ struct TORCH_API Type {
|
||||
virtual TypePtr createWithContained(
|
||||
// NOLINTNEXTLINE(performance-unnecessary-value-param)
|
||||
std::vector<TypePtr> /*contained_types*/) const {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"type with contained types did not overload createWithContained: ",
|
||||
str());
|
||||
}
|
||||
|
||||
@ -562,7 +562,7 @@ public:
|
||||
}
|
||||
|
||||
template<class Lambda>
|
||||
C10_DEPRECATED_MESSAGE("Registering operator kernels with stateful lambdas (i.e. lambdas with a capture) has non-obvious behavior. This is deprecated. Please use a lambda without a capture or a functor class instead.")
|
||||
[[deprecated("Registering operator kernels with stateful lambdas (i.e. lambdas with a capture) has non-obvious behavior. This is deprecated. Please use a lambda without a capture or a functor class instead.")]]
|
||||
// enable_if: only enable it if Lambda is actually a functor but not a stateless lambda
|
||||
std::enable_if_t<guts::is_functor<Lambda>::value && !guts::is_stateless_lambda<std::decay_t<Lambda>>::value, RegisterOperators&&>
|
||||
op(const std::string& schemaOrName, Lambda&& lambda, Options&& options = RegisterOperators::options()) && {
|
||||
|
||||
@ -21,7 +21,7 @@ class Operation {
|
||||
public:
|
||||
template <typename F,
|
||||
std::enable_if_t<accepts<F, Stack*>::value, int> = 0>
|
||||
C10_DEPRECATED_MESSAGE("Please use void(Stack&) to register operator instead.")
|
||||
[[deprecated("Please use void(Stack&) to register operator instead.")]]
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward)
|
||||
Operation(F&& raw): op_([raw = std::forward<F>(raw)](Stack& stack) {
|
||||
raw(&stack);
|
||||
|
||||
@ -629,7 +629,7 @@ MatchTypeReturn matchTypeVariables(
|
||||
}
|
||||
}
|
||||
|
||||
AT_ERROR("Unhandled free variable container: ", formal->repr_str());
|
||||
TORCH_CHECK(false, "Unhandled free variable container: ", formal->repr_str());
|
||||
}
|
||||
|
||||
// change return types like List[List[t]] into List[List[int]]
|
||||
|
||||
@ -34,7 +34,7 @@ static rocblas_operation hipOperationToRocOperation(hipblasOperation_t op)
|
||||
case HIPBLAS_OP_C:
|
||||
return rocblas_operation_conjugate_transpose;
|
||||
}
|
||||
AT_ERROR("HIPBLAS_STATUS_INVALID_ENUM");
|
||||
TORCH_CHECK(false, "HIPBLAS_STATUS_INVALID_ENUM");
|
||||
}
|
||||
static hipblasStatus_t rocBLASStatusToHIPStatus(rocblas_status error)
|
||||
{
|
||||
@ -57,7 +57,7 @@ static hipblasStatus_t rocBLASStatusToHIPStatus(rocblas_status error)
|
||||
case rocblas_status_internal_error:
|
||||
return HIPBLAS_STATUS_INTERNAL_ERROR;
|
||||
}
|
||||
AT_ERROR("HIPBLAS_STATUS_INVALID_ENUM");
|
||||
TORCH_CHECK(false, "HIPBLAS_STATUS_INVALID_ENUM");
|
||||
}
|
||||
// hipblas does not have hipblasSetMathMode
|
||||
#define hipblasSetMathMode(handle, flags) HIPBLAS_STATUS_SUCCESS
|
||||
@ -116,7 +116,7 @@ static cublasOperation_t _cublasOpFromChar(char op) {
|
||||
case 'C':
|
||||
return CUBLAS_OP_C;
|
||||
}
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"_cublasOpFromChar input should be 't', 'n' or 'c' but got `", op, "`");
|
||||
}
|
||||
|
||||
|
||||
@ -165,9 +165,9 @@ constexpr const char* _cusolver_backend_suggestion = \
|
||||
[[maybe_unused]] CUresult get_error_str_err = \
|
||||
at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \
|
||||
if (get_error_str_err != CUDA_SUCCESS) { \
|
||||
AT_ERROR("CUDA driver error: unknown error"); \
|
||||
TORCH_CHECK(false, "CUDA driver error: unknown error"); \
|
||||
} else { \
|
||||
AT_ERROR("CUDA driver error: ", err_str); \
|
||||
TORCH_CHECK(false, "CUDA driver error: ", err_str); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
@ -178,7 +178,7 @@ constexpr const char* _cusolver_backend_suggestion = \
|
||||
do { \
|
||||
CUresult __err = EXPR; \
|
||||
if (__err != CUDA_SUCCESS) { \
|
||||
AT_ERROR("CUDA driver error: ", static_cast<int>(__err)); \
|
||||
TORCH_CHECK(false, "CUDA driver error: ", static_cast<int>(__err)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
@ -198,9 +198,9 @@ constexpr const char* _cusolver_backend_suggestion = \
|
||||
nvrtcResult __err = EXPR; \
|
||||
if (__err != NVRTC_SUCCESS) { \
|
||||
if (static_cast<int>(__err) != 7) { \
|
||||
AT_ERROR("CUDA NVRTC error: ", at::globalContext().getNVRTC().nvrtcGetErrorString(__err)); \
|
||||
TORCH_CHECK(false, "CUDA NVRTC error: ", at::globalContext().getNVRTC().nvrtcGetErrorString(__err)); \
|
||||
} else { \
|
||||
AT_ERROR("CUDA NVRTC error: NVRTC_ERROR_BUILTIN_OPERATION_FAILURE"); \
|
||||
TORCH_CHECK(false, "CUDA NVRTC error: NVRTC_ERROR_BUILTIN_OPERATION_FAILURE"); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
@ -103,7 +103,7 @@ void CUDAHooks::init() const {
|
||||
#endif
|
||||
}
|
||||
|
||||
const Generator& CUDAHooks::getDefaultCUDAGenerator(DeviceIndex device_index) const {
|
||||
const Generator& CUDAHooks::getDefaultGenerator(DeviceIndex device_index) const {
|
||||
return at::cuda::detail::getDefaultCUDAGenerator(device_index);
|
||||
}
|
||||
|
||||
@ -300,7 +300,7 @@ long CUDAHooks::versionCuDNN() const {
|
||||
#if AT_CUDNN_ENABLED()
|
||||
return CUDNN_VERSION;
|
||||
#else
|
||||
AT_ERROR("Cannot query CuDNN version if ATen_cuda is not built with CuDNN");
|
||||
TORCH_CHECK(false, "Cannot query CuDNN version if ATen_cuda is not built with CuDNN");
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -408,7 +408,7 @@ double CUDAHooks::batchnormMinEpsilonCuDNN() const {
|
||||
#if AT_CUDNN_ENABLED()
|
||||
return CUDNN_BN_MIN_EPSILON;
|
||||
#else
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Cannot query CUDNN_BN_MIN_EPSILON if ATen_cuda is not built with CuDNN");
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -22,7 +22,8 @@ struct CUDAHooks : public at::CUDAHooksInterface {
|
||||
void init() const override;
|
||||
Device getDeviceFromPtr(void* data) const override;
|
||||
bool isPinnedPtr(const void* data) const override;
|
||||
const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1) const override;
|
||||
const Generator& getDefaultGenerator(
|
||||
DeviceIndex device_index = -1) const override;
|
||||
bool hasCUDA() const override;
|
||||
bool hasMAGMA() const override;
|
||||
bool hasCuDNN() const override;
|
||||
|
||||
@ -310,7 +310,7 @@ static hipblasOperation_t _hipblasOpFromChar(char op) {
|
||||
case 'C':
|
||||
return HIPBLAS_OP_C;
|
||||
}
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"_hipblasOpFromChar input should be 't', 'n' or 'c' but got `", op, "`");
|
||||
}
|
||||
|
||||
@ -323,7 +323,7 @@ static char _charFromhipblasOp(hipblasOperation_t op) {
|
||||
case HIPBLAS_OP_C:
|
||||
return 'C';
|
||||
}
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"_charFromhipblasOp input should be HIPBLAS_OP_N/T/C but got `", op, "`");
|
||||
}
|
||||
|
||||
|
||||
@ -130,7 +130,7 @@ static rocblas_operation _rocblasOpFromChar(char op) {
|
||||
case 'C':
|
||||
return rocblas_operation_conjugate_transpose;
|
||||
}
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"_rocblasOpFromChar input should be 't', 'n' or 'c' but got `", op, "`");
|
||||
}
|
||||
|
||||
|
||||
@ -197,15 +197,15 @@ class GemmTunableOp : public TunableOp<GemmParams<T>, StreamTimer> {
|
||||
this->RegisterOp(std::string("Default"), std::make_unique<DefaultGemmOp<T>>());
|
||||
|
||||
#ifdef USE_ROCM
|
||||
static const char *env_rocblas = std::getenv("PYTORCH_TUNABLEOP_ROCBLAS_ENABLED");
|
||||
if (env_rocblas == nullptr || strcmp(env_rocblas, "1") == 0) {
|
||||
static const auto env_rocblas = c10::utils::check_env("PYTORCH_TUNABLEOP_ROCBLAS_ENABLED");
|
||||
if (!env_rocblas.has_value() || env_rocblas.value()) {
|
||||
for (auto&& [name, op] : GetRocBlasGemmTypeStringAndOps<T>()) {
|
||||
this->RegisterOp(std::move(name), std::move(op));
|
||||
}
|
||||
}
|
||||
|
||||
static const char *env_hipblaslt = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (env_hipblaslt == nullptr || strcmp(env_hipblaslt, "1") == 0) {
|
||||
static const auto env_hipblaslt = c10::utils::check_env("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (!env_hipblaslt.has_value() || env_hipblaslt.value()) {
|
||||
// disallow tuning of hipblaslt with c10::complex
|
||||
if constexpr (
|
||||
!std::is_same_v<T, c10::complex<float>> &&
|
||||
@ -230,8 +230,8 @@ class GemmAndBiasTunableOp : public TunableOp<GemmAndBiasParams<T>, StreamTimer>
|
||||
this->RegisterOp(std::string("Default"), std::make_unique<DefaultGemmAndBiasOp<T>>());
|
||||
|
||||
#ifdef USE_ROCM
|
||||
static const char *env_hipblaslt = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (env_hipblaslt == nullptr || strcmp(env_hipblaslt, "1") == 0) {
|
||||
static const auto env_hipblaslt = c10::utils::check_env("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (!env_hipblaslt.has_value() || env_hipblaslt.value()) {
|
||||
// disallow tuning of hipblaslt with c10::complex
|
||||
if constexpr (
|
||||
!std::is_same_v<T, c10::complex<float>> &&
|
||||
@ -256,15 +256,15 @@ class GemmStridedBatchedTunableOp : public TunableOp<GemmStridedBatchedParams<T>
|
||||
this->RegisterOp(std::string("Default"), std::make_unique<DefaultGemmStridedBatchedOp<T>>());
|
||||
|
||||
#ifdef USE_ROCM
|
||||
static const char *env_rocblas = std::getenv("PYTORCH_TUNABLEOP_ROCBLAS_ENABLED");
|
||||
if (env_rocblas == nullptr || strcmp(env_rocblas, "1") == 0) {
|
||||
static const auto env_rocblas = c10::utils::check_env("PYTORCH_TUNABLEOP_ROCBLAS_ENABLED");
|
||||
if (!env_rocblas.has_value() || env_rocblas.value()) {
|
||||
for (auto&& [name, op] : GetRocBlasGemmStridedBatchedTypeStringAndOps<T>()) {
|
||||
this->RegisterOp(std::move(name), std::move(op));
|
||||
}
|
||||
}
|
||||
|
||||
static const char *env_hipblaslt = std::getenv("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (env_hipblaslt == nullptr || strcmp(env_hipblaslt, "1") == 0) {
|
||||
static const auto env_hipblaslt = c10::utils::check_env("PYTORCH_TUNABLEOP_HIPBLASLT_ENABLED");
|
||||
if (!env_hipblaslt.has_value() || env_hipblaslt.value()) {
|
||||
// disallow tuning of hipblaslt with c10::complex
|
||||
if constexpr (
|
||||
!std::is_same_v<T, c10::complex<float>> &&
|
||||
|
||||
@ -113,7 +113,7 @@ _cudnn_rnn_cast_reflatten(const Tensor & input,
|
||||
batch_sizes,
|
||||
dropout_state);
|
||||
#else // AT_CUDNN_ENABLED()
|
||||
AT_ERROR("autocast::_cudnn_rnn_cast_reflatten: ATen not compiled with cuDNN support");
|
||||
TORCH_CHECK(false, "autocast::_cudnn_rnn_cast_reflatten: ATen not compiled with cuDNN support");
|
||||
return {Tensor{}, Tensor{}, Tensor{}, Tensor{}, Tensor{}}; // never reached, placates the compiler
|
||||
#endif // AT_CUDNN_ENABLED()
|
||||
}
|
||||
|
||||
@ -1,9 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/Device.h>
|
||||
#include <c10/core/Stream.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
|
||||
|
||||
namespace at {
|
||||
|
||||
// AcceleratorHooksInterface is a shared interface provided by all
|
||||
@ -58,7 +62,18 @@ struct TORCH_API AcceleratorHooksInterface {
|
||||
virtual Device getDeviceFromPtr(void* data) const {
|
||||
TORCH_CHECK(false, "Backend doesn't support getDeviceFromPtr()");
|
||||
}
|
||||
|
||||
virtual const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Backend doesn`t support getDefaultGenerator()");
|
||||
}
|
||||
|
||||
virtual Generator getNewGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Backend doesn`t support getNewGenerator()");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace at
|
||||
|
||||
C10_DIAGNOSTIC_POP()
|
||||
|
||||
@ -6,16 +6,13 @@
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
// Forward-declares at::Generator and at::cuda::NVRTC
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
struct Generator;
|
||||
|
||||
// Forward-declares at::cuda::NVRTC
|
||||
namespace cuda {
|
||||
struct NVRTC;
|
||||
} // namespace cuda
|
||||
} // namespace at
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
|
||||
#ifdef _MSC_VER
|
||||
constexpr const char* CUDA_HELP =
|
||||
@ -69,8 +66,8 @@ struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface {
|
||||
TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP);
|
||||
}
|
||||
|
||||
virtual const Generator& getDefaultCUDAGenerator(
|
||||
[[maybe_unused]] DeviceIndex device_index = -1) const {
|
||||
const Generator& getDefaultGenerator(
|
||||
[[maybe_unused]] DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"Cannot get default CUDA generator without ATen_cuda library. ",
|
||||
|
||||
@ -1,19 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/GeneratorImpl.h>
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace at {
|
||||
class Context;
|
||||
}
|
||||
|
||||
// NB: Class must live in `at` due to limitations of Registry.h.
|
||||
namespace at {
|
||||
|
||||
@ -30,8 +24,9 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
|
||||
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<c10::GeneratorImpl> initHIPGenerator(Context*) const {
|
||||
AT_ERROR("Cannot initialize HIP generator without ATen_hip library.");
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library.");
|
||||
}
|
||||
|
||||
virtual bool hasHIP() const {
|
||||
@ -47,11 +42,7 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
|
||||
}
|
||||
|
||||
Allocator* getPinnedMemoryAllocator() const override {
|
||||
AT_ERROR("Pinned memory requires HIP.");
|
||||
}
|
||||
|
||||
virtual void registerHIPTypes(Context*) const {
|
||||
AT_ERROR("Cannot registerHIPTypes() without ATen_hip library.");
|
||||
TORCH_CHECK(false, "Pinned memory requires HIP.");
|
||||
}
|
||||
|
||||
virtual int getNumGPUs() const {
|
||||
@ -59,7 +50,7 @@ struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface {
|
||||
}
|
||||
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
AT_ERROR("Cannot check primary context without ATen_hip library.");
|
||||
TORCH_CHECK(false, "Cannot check primary context without ATen_hip library.");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
@ -9,7 +8,7 @@
|
||||
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API IPUHooksInterface: AcceleratorHooksInterface {
|
||||
struct TORCH_API IPUHooksInterface : AcceleratorHooksInterface {
|
||||
~IPUHooksInterface() override = default;
|
||||
|
||||
void init() const override {
|
||||
@ -21,16 +20,14 @@ struct TORCH_API IPUHooksInterface: AcceleratorHooksInterface {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual const Generator& getDefaultIPUGenerator(
|
||||
DeviceIndex device_index [[maybe_unused]] = -1) const {
|
||||
AT_ERROR(
|
||||
"Cannot get the default IPU generator: the IPU backend is not "
|
||||
"available.");
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
}
|
||||
|
||||
virtual Generator newIPUGenerator(DeviceIndex device_index [[maybe_unused]] = -1) const {
|
||||
AT_ERROR(
|
||||
"Cannot create a new IPU generator: the IPU backend is not available.");
|
||||
Generator getNewGenerator(
|
||||
DeviceIndex device_index [[maybe_unused]] = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library.");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
@ -31,7 +31,8 @@ struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface {
|
||||
virtual bool isOnMacOSorNewer(unsigned major = 13, unsigned minor = 0) const {
|
||||
FAIL_MPSHOOKS_FUNC(__func__);
|
||||
}
|
||||
virtual const Generator& getDefaultMPSGenerator() const {
|
||||
const Generator& getDefaultGenerator(
|
||||
C10_UNUSED DeviceIndex device_index = -1) const override {
|
||||
FAIL_MPSHOOKS_FUNC(__func__);
|
||||
}
|
||||
virtual Allocator* getMPSDeviceAllocator() const {
|
||||
|
||||
@ -1,18 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/Device.h>
|
||||
#include <c10/core/Storage.h>
|
||||
#include <c10/util/Exception.h>
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
|
||||
|
||||
namespace at {
|
||||
|
||||
struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
|
||||
~PrivateUse1HooksInterface() override = default;
|
||||
virtual const at::Generator& getDefaultGenerator(
|
||||
c10::DeviceIndex device_index) const {
|
||||
|
||||
const at::Generator& getDefaultGenerator(
|
||||
c10::DeviceIndex device_index) const override {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
false,
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDefaultGenerator`.");
|
||||
@ -24,17 +26,17 @@ struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface {
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getDeviceFromPtr`.");
|
||||
}
|
||||
|
||||
virtual bool isPinnedPtr(const void* data) const override {
|
||||
bool isPinnedPtr(const void* data) const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual Allocator* getPinnedMemoryAllocator() const override {
|
||||
Allocator* getPinnedMemoryAllocator() const override {
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `getPinnedMemoryAllocator`.");
|
||||
}
|
||||
|
||||
virtual bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
bool hasPrimaryContext(DeviceIndex device_index) const override {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(
|
||||
false,
|
||||
"You should register `PrivateUse1HooksInterface` for PrivateUse1 before call `hasPrimaryContext`.");
|
||||
|
||||
@ -4,7 +4,6 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Registry.h>
|
||||
|
||||
#include <ATen/core/Generator.h>
|
||||
#include <ATen/detail/AcceleratorHooksInterface.h>
|
||||
|
||||
C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter")
|
||||
@ -32,17 +31,17 @@ struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{
|
||||
TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library.");
|
||||
}
|
||||
|
||||
virtual Generator getXPUGenerator(
|
||||
[[maybe_unused]] DeviceIndex device_index = -1) const {
|
||||
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
|
||||
}
|
||||
|
||||
virtual const Generator& getDefaultXPUGenerator(
|
||||
[[maybe_unused]] DeviceIndex device_index = -1) const {
|
||||
const Generator& getDefaultGenerator(
|
||||
[[maybe_unused]] DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(
|
||||
false, "Cannot get default XPU generator without ATen_xpu library.");
|
||||
}
|
||||
|
||||
Generator getNewGenerator(
|
||||
[[maybe_unused]] DeviceIndex device_index = -1) const override {
|
||||
TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library.");
|
||||
}
|
||||
|
||||
virtual DeviceIndex getNumGPUs() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -32,7 +32,9 @@
|
||||
#define DLPACK_DLL
|
||||
#endif
|
||||
|
||||
// NOLINTNEXTLINE(modernize-deprecated-headers)
|
||||
#include <stdint.h>
|
||||
// NOLINTNEXTLINE(modernize-deprecated-headers)
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@ -224,7 +224,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
|
||||
// but shape inference is not possible.
|
||||
if (self.sym_numel() == 0) {
|
||||
if (num_classes <= 0) {
|
||||
AT_ERROR("Can not infer total number of classes from empty tensor.");
|
||||
TORCH_CHECK(false, "Can not infer total number of classes from empty tensor.");
|
||||
} else {
|
||||
shape.emplace_back(num_classes);
|
||||
return at::empty_symint(shape, self.options());
|
||||
|
||||
@ -103,7 +103,7 @@ template<
|
||||
// optional cannot be used in a template, otherwise we would use it here.
|
||||
int maybe_keepdim_arg_pos
|
||||
>
|
||||
void boxed_reduction_batch_rule(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
static void boxed_reduction_batch_rule(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
|
||||
const auto& schema = op.schema();
|
||||
const auto num_returns = schema.returns().size();
|
||||
const auto num_arguments = schema.arguments().size();
|
||||
@ -357,21 +357,21 @@ static std::tuple<Tensor, std::optional<int64_t>> searchsorted_batch_rule(
|
||||
// B<...>D, B<...>V -> no change
|
||||
if (buckets_bdim.has_value() && self_bdim.has_value()) {
|
||||
auto self_ = moveBatchDimToFront(self, self_bdim);
|
||||
auto result = at::searchsorted(buckets, self_, out_int32, right, std::move(side), sorter_);
|
||||
auto result = at::searchsorted(buckets, self_, out_int32, right, side, sorter_);
|
||||
return std::make_tuple(std::move(result), 0);
|
||||
}
|
||||
// B<...>D, <...>V -> B<...>D, B<...>V
|
||||
if (buckets_bdim.has_value() && !self_bdim.has_value()) {
|
||||
auto self_ = moveBatchDimToFront(self, self_bdim);
|
||||
self_ = ensure_has_bdim(self_, self_bdim.has_value(), buckets.size(0));
|
||||
auto result = at::searchsorted(buckets, self_, out_int32, right, std::move(side), sorter_);
|
||||
auto result = at::searchsorted(buckets, self_, out_int32, right, side, sorter_);
|
||||
return std::make_tuple(std::move(result), 0);
|
||||
}
|
||||
// <...>D, B<...>V -> <...>D, <...>(BV)
|
||||
if (!buckets_bdim.has_value() && self_bdim.has_value()) {
|
||||
auto bdim_size = self.size(*self_bdim);
|
||||
auto self_ = reshape_dim_into(*self_bdim, -1, self);
|
||||
auto result = at::searchsorted(buckets, self_, out_int32, right, std::move(side), sorter_);
|
||||
auto result = at::searchsorted(buckets, self_, out_int32, right, side, sorter_);
|
||||
result = reshape_dim_outof(-1, bdim_size, result);
|
||||
return std::make_tuple(result, result.dim() - 2);
|
||||
}
|
||||
@ -382,7 +382,7 @@ static std::tuple<Tensor, std::optional<int64_t>> searchsorted_batch_rule(
|
||||
if (buckets_bdim.has_value() && self_bdim.has_value()) {
|
||||
auto self_ = moveBatchDimToFront(self, self_bdim);
|
||||
auto self_view_ = self_logical_rank == 0 ? self_.unsqueeze(-1) : self_.flatten(1);
|
||||
auto result = at::searchsorted(buckets, self_view_, out_int32, right, std::move(side), sorter_);
|
||||
auto result = at::searchsorted(buckets, self_view_, out_int32, right, side, sorter_);
|
||||
result = self_logical_rank == 0 ? result.squeeze(-1) : result.view(self_.sizes());
|
||||
return std::make_tuple(std::move(result), 0);
|
||||
}
|
||||
@ -391,13 +391,13 @@ static std::tuple<Tensor, std::optional<int64_t>> searchsorted_batch_rule(
|
||||
auto bdim_size = buckets.size(*buckets_bdim);
|
||||
auto self_ = ensure_has_bdim(self, false, bdim_size);
|
||||
auto self_view_ = self_logical_rank == 0 ? self_.unsqueeze(-1) : self_.flatten(1);
|
||||
auto result = at::searchsorted(buckets, self_view_, out_int32, right, std::move(side), sorter_);
|
||||
auto result = at::searchsorted(buckets, self_view_, out_int32, right, side, sorter_);
|
||||
result = self_logical_rank == 0 ? result.squeeze(-1) : result.view(self_.sizes());
|
||||
return std::make_tuple(std::move(result), 0);
|
||||
}
|
||||
// D, B* -> no change
|
||||
if (!buckets_bdim.has_value() && self_bdim.has_value()) {
|
||||
auto result = at::searchsorted(buckets, self, out_int32, right, std::move(side), sorter_);
|
||||
auto result = at::searchsorted(buckets, self, out_int32, right, side, sorter_);
|
||||
return std::make_tuple(std::move(result), self_bdim);
|
||||
}
|
||||
TORCH_INTERNAL_ASSERT(false);
|
||||
|
||||
@ -16,7 +16,7 @@ at::Tensor& metal_copy_(at::Tensor& self, const at::Tensor& src) {
|
||||
if (p) {
|
||||
return p->metal_copy_(self, src);
|
||||
}
|
||||
AT_ERROR("Metal backend was not linked to the build");
|
||||
TORCH_CHECK(false, "Metal backend was not linked to the build");
|
||||
}
|
||||
} // namespace at::metal
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@ miopen_rnn(const Tensor & input_r,
|
||||
fn_dropout_state_opt);
|
||||
|
||||
#else
|
||||
AT_ERROR("autocast::miopen_rnn: ATen not compiled with ROCm enabled");
|
||||
TORCH_CHECK(false, "autocast::miopen_rnn: ATen not compiled with ROCm enabled");
|
||||
return {Tensor{}, Tensor{}, Tensor{}, Tensor{}, Tensor{}}; // placate the compiler
|
||||
#endif
|
||||
|
||||
|
||||
@ -19,7 +19,8 @@ struct MPSHooks : public at::MPSHooksInterface {
|
||||
bool isOnMacOSorNewer(unsigned major, unsigned minor) const override;
|
||||
|
||||
// MPSGeneratorImpl interface
|
||||
const Generator& getDefaultMPSGenerator() const override;
|
||||
const Generator& getDefaultGenerator(
|
||||
DeviceIndex device_index = -1) const override;
|
||||
|
||||
// MPSStream interface
|
||||
void deviceSynchronize() const override;
|
||||
|
||||
@ -59,7 +59,7 @@ Allocator* MPSHooks::getMPSDeviceAllocator() const {
|
||||
return at::mps::GetMPSAllocator();
|
||||
}
|
||||
|
||||
const Generator& MPSHooks::getDefaultMPSGenerator() const {
|
||||
const Generator& MPSHooks::getDefaultGenerator([[maybe_unused]] DeviceIndex device_index) const {
|
||||
return at::mps::detail::getDefaultMPSGenerator();
|
||||
}
|
||||
|
||||
|
||||
@ -189,7 +189,7 @@ void MPSProfiler::initialize() {
|
||||
currentSigint.sa_flags = SA_RESTART;
|
||||
sigfillset(¤tSigint.sa_mask);
|
||||
if (sigaction(SIGINT, ¤tSigint, &previousSigint) == -1) {
|
||||
AT_ERROR("Cannot install SIGINT handler for MPSProfiler.");
|
||||
TORCH_CHECK(false, "Cannot install SIGINT handler for MPSProfiler.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,7 +207,7 @@ void MPSProfiler::StartTrace(const std::string& mode, bool waitUntilCompleted) {
|
||||
} else if (token == "event") {
|
||||
m_profile_options |= ProfileOptions::ALL_SIGNPOST_EVENTS;
|
||||
} else {
|
||||
AT_ERROR("Invalid Signpost trace mode: ", token);
|
||||
TORCH_CHECK(false, "Invalid Signpost trace mode: ", token);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -654,7 +654,7 @@ bool MPSProfiler::isProfileInfoLoggingEnabled(BaseInfo::Type infoType, bool isEx
|
||||
isInfoLoggingEnabled = (m_log_options & LogOptions::CPU_FALLBACK_INFO);
|
||||
break;
|
||||
default:
|
||||
AT_ERROR("invalid profiling info type");
|
||||
TORCH_CHECK(false, "invalid profiling info type");
|
||||
}
|
||||
if (!isInfoLoggingEnabled) {
|
||||
return false;
|
||||
@ -685,7 +685,7 @@ void MPSProfiler::emitSignpostEvent(SignpostTypes signpost_type,
|
||||
os_signpost_event_emit(m_os_log_events, signpost_id, kEvtSignpostCPUFallbacksStr, "%s", msg);
|
||||
break;
|
||||
default:
|
||||
AT_ERROR("unknown SignpostType in MPS profiler");
|
||||
TORCH_CHECK(false, "unknown SignpostType in MPS profiler");
|
||||
}
|
||||
}
|
||||
|
||||
@ -709,7 +709,7 @@ void MPSProfiler::beginSignpostInterval(SignpostTypes signpost_type,
|
||||
os_signpost_interval_begin(m_os_log_intervals, signpost_id, kIntSignpostCPUFallbacksStr, "%s", msg);
|
||||
break;
|
||||
default:
|
||||
AT_ERROR("unknown SignpostType in MPS profiler");
|
||||
TORCH_CHECK(false, "unknown SignpostType in MPS profiler");
|
||||
}
|
||||
}
|
||||
|
||||
@ -728,7 +728,7 @@ void MPSProfiler::endSignpostInterval(SignpostTypes signpost_type, os_signpost_i
|
||||
os_signpost_interval_end(m_os_log_intervals, signpost_id, kIntSignpostCPUFallbacksStr);
|
||||
break;
|
||||
default:
|
||||
AT_ERROR("unknown SignpostType in MPS profiler");
|
||||
TORCH_CHECK(false, "unknown SignpostType in MPS profiler");
|
||||
}
|
||||
}
|
||||
|
||||
@ -750,7 +750,7 @@ MPSProfiler::SignpostTypes MPSProfiler::getSignpostType(BaseInfo::Type infoType)
|
||||
case BaseInfo::Type::CPU_FALLBACK:
|
||||
return SignpostTypes::CPU_FALLBACK;
|
||||
default:
|
||||
AT_ERROR("invalid profiling info type");
|
||||
TORCH_CHECK(false, "invalid profiling info type");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1624,7 +1624,7 @@ Tensor inverse(const Tensor& A) {
|
||||
template<typename scalar_t>
|
||||
static void apply_cholesky_solve(Tensor& b, Tensor& A, bool upper, Tensor& infos) {
|
||||
#if !AT_BUILD_WITH_LAPACK()
|
||||
AT_ERROR("cholesky_solve: LAPACK library not found in compilation");
|
||||
TORCH_CHECK(false, "cholesky_solve: LAPACK library not found in compilation");
|
||||
#else
|
||||
char uplo = upper ? 'U' : 'L';
|
||||
|
||||
|
||||
@ -168,7 +168,7 @@ static void check_args(CheckedFrom c, IntArrayRef args, size_t expected_size, co
|
||||
ss << arg_name << " should be greater than zero but got (";
|
||||
std::copy(args.begin(), args.end() - 1, std::ostream_iterator<int>(ss,", "));
|
||||
ss << args.back() << ")" << " (while checking arguments for " << c << ")";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -719,7 +719,7 @@ static void check_shape_forward(const at::Tensor& input,
|
||||
separator = " x ";
|
||||
}
|
||||
|
||||
AT_ERROR("Calculated padded input size per channel: (", input_ss.str(), "). "
|
||||
TORCH_CHECK(false, "Calculated padded input size per channel: (", input_ss.str(), "). "
|
||||
"Kernel size: (", kernel_ss.str(), "). Kernel size can't be greater than actual input size");
|
||||
}
|
||||
} else { // transposed
|
||||
@ -1304,7 +1304,7 @@ ConvBackend _select_conv_backend(
|
||||
}
|
||||
|
||||
// Error out if no suitable backend was found.
|
||||
AT_ERROR("unsupported ConvNd parameters");
|
||||
TORCH_CHECK(false, "unsupported ConvNd parameters");
|
||||
}
|
||||
|
||||
// Selects a backend for convolution based on the inputs and params.
|
||||
|
||||
@ -262,7 +262,7 @@ void* DispatchStubImpl::get_call_ptr(
|
||||
false, "DispatchStub: missing kernel for ", device_type);
|
||||
return nullptr;
|
||||
case ErrorType::DeviceNotSupported:
|
||||
AT_ERROR("DispatchStub: unsupported device type", device_type);
|
||||
TORCH_CHECK(false, "DispatchStub: unsupported device type", device_type);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -81,7 +81,7 @@ Tensor embedding_sparse_backward(
|
||||
|
||||
// TODO: implement scale_grad_by_freq
|
||||
if (scale_grad_by_freq) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"embedding_backward: scale_grad_by_freq not supported with sparse gradients");
|
||||
}
|
||||
|
||||
|
||||
@ -104,7 +104,7 @@ Tensor& fill_diagonal_(Tensor& self, const Scalar& fill_value, bool wrap) {
|
||||
int64_t dim1 = height;
|
||||
for (const auto i : c10::irange(1, nDims)) {
|
||||
if (self.size(i) != dim1) {
|
||||
AT_ERROR("all dimensions of input must be of equal length");
|
||||
TORCH_CHECK(false, "all dimensions of input must be of equal length");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -269,7 +269,7 @@ inline double _get_epsilon(const ScalarType& sc_type) {
|
||||
case at::ScalarType::Double:
|
||||
return std::numeric_limits<double>::epsilon();
|
||||
default:
|
||||
AT_ERROR("This function doesn't handle types other than float and double");
|
||||
TORCH_CHECK(false, "This function doesn't handle types other than float and double");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -136,7 +136,7 @@ static void max_unpooling3d_shape_check(
|
||||
if (gradOutput.defined()) {
|
||||
if (oT != gradOutput.size(dimt) || oH != gradOutput.size(dimh) ||
|
||||
oW != gradOutput.size(dimw)) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Inconsistent gradOutput size. oT= ",
|
||||
oT,
|
||||
", oH= ",
|
||||
|
||||
@ -85,7 +85,7 @@ static inline void slow_conv_transpose2d_shape_check(
|
||||
check_dim_size(bias, 1, 0, weight.size(1));
|
||||
}
|
||||
} else if (!weight_nullable) {
|
||||
AT_ERROR("weight tensor is expected to be non-nullable");
|
||||
TORCH_CHECK(false, "weight tensor is expected to be non-nullable");
|
||||
}
|
||||
|
||||
int ndim = input.dim();
|
||||
@ -112,7 +112,7 @@ static inline void slow_conv_transpose2d_shape_check(
|
||||
(dilation_width * (kernel_width - 1) + 1) + output_padding_width;
|
||||
|
||||
if (output_width < 1 || output_height < 1) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Given input size per channel: (",
|
||||
input_height,
|
||||
" x ",
|
||||
|
||||
@ -107,7 +107,7 @@ static inline void slow_conv_transpose3d_shape_check(
|
||||
check_dim_size(bias, 1, 0, weight.size(1));
|
||||
}
|
||||
} else if (!weight_nullable) {
|
||||
AT_ERROR("weight tensor is expected to be non-nullable");
|
||||
TORCH_CHECK(false, "weight tensor is expected to be non-nullable");
|
||||
}
|
||||
|
||||
int ndim = input.dim();
|
||||
@ -142,7 +142,7 @@ static inline void slow_conv_transpose3d_shape_check(
|
||||
output_padding_width;
|
||||
|
||||
if (output_depth < 1 || output_width < 1 || output_height < 1) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Given input size per channel: (",
|
||||
input_depth,
|
||||
" x ",
|
||||
|
||||
@ -573,12 +573,12 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, int64_t> _batch_norm_impl_index(
|
||||
if (running_mean.defined()) {
|
||||
check_dims_match_num_input_features("running_mean", num_features, running_mean.sym_numel());
|
||||
} else if (!training) {
|
||||
AT_ERROR("running_mean must be defined in evaluation mode");
|
||||
TORCH_CHECK(false, "running_mean must be defined in evaluation mode");
|
||||
}
|
||||
if (running_var.defined()) {
|
||||
check_dims_match_num_input_features("running_var", num_features, running_var.sym_numel());
|
||||
} else if (!training) {
|
||||
AT_ERROR("running_var must be defined in evaluation mode");
|
||||
TORCH_CHECK(false, "running_var must be defined in evaluation mode");
|
||||
}
|
||||
if (weight.defined()) {
|
||||
check_dims_match_num_input_features("weight", num_features, weight.sym_numel());
|
||||
|
||||
@ -34,7 +34,7 @@ Tensor one_hot(const Tensor &self, int64_t num_classes) {
|
||||
// but shape inference is not possible.
|
||||
if (self.numel() == 0) {
|
||||
if (num_classes <= 0) {
|
||||
AT_ERROR("Can not infer total number of classes from empty tensor.");
|
||||
TORCH_CHECK(false, "Can not infer total number of classes from empty tensor.");
|
||||
} else {
|
||||
shape.push_back(num_classes);
|
||||
return at::empty(shape, self.options());
|
||||
|
||||
@ -51,7 +51,7 @@ std::tuple<Tensor, Tensor> _pack_padded_sequence(const Tensor& _input, const Ten
|
||||
// NB: enforce_sorted is implemented at a Python level, but the sortedness
|
||||
// check lives here. If enforce_sorted=False then this error should never
|
||||
// get called.
|
||||
AT_ERROR("`lengths` array must be sorted in decreasing order when "
|
||||
TORCH_CHECK(false, "`lengths` array must be sorted in decreasing order when "
|
||||
"`enforce_sorted` is True. You can pass `enforce_sorted=False` "
|
||||
"to pack_padded_sequence and/or pack_sequence to sidestep this "
|
||||
"requirement if you do not need ONNX exportability.");
|
||||
|
||||
@ -83,7 +83,7 @@ Tensor repeat_interleave_symint(
|
||||
repeats.sym_size(0), " and input.size(", dim.value(), ") = ", input.sym_size(dim.value())
|
||||
);
|
||||
} else {
|
||||
AT_ERROR("repeats must be 0-dim or 1-dim tensor");
|
||||
TORCH_CHECK(false, "repeats must be 0-dim or 1-dim tensor");
|
||||
}
|
||||
|
||||
auto ret = input.index_select(
|
||||
|
||||
@ -881,12 +881,12 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const std::optional<int64_t
|
||||
if (!at::isFloatingType(self.scalar_type()) && !at::isComplexType(self.scalar_type())) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected a tensor of floating point or complex values";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (self.dim() > 2 || self.dim() < 1) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected a 1D or 2D tensor";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
Tensor input = self;
|
||||
if (self.dim() == 1) {
|
||||
@ -911,24 +911,24 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const std::optional<int64_t
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected 0 < n_fft < " << len
|
||||
<< ", but got n_fft=" << win_length;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (hop_length <= 0) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected hop_length > 0, but got hop_length=" << hop_length;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (win_length <= 0 || win_length > n_fft) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected 0 < win_length <= n_fft, but got win_length="
|
||||
<< win_length;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (window.defined() && (window.dim() != 1 || window.size(0) != win_length)) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected a 1D window tensor of size equal to win_length="
|
||||
<< win_length << ", but got window with size " << window.sizes();
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
#undef REPR
|
||||
auto window_ = window;
|
||||
@ -1063,17 +1063,17 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const std::optional<int64_
|
||||
if (input.numel() == 0) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": input tensor cannot be empty.";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (input_dim != 3 && input_dim != 4) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected a tensor with 3 or 4 dimensions, but got " << input_dim;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (input.size(-1) != 2) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected the last dimension to be 2 (corresponding to real and imaginary parts), but got " << self.size(-1);
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
|
||||
const bool onesided = onesidedOpt.value_or(fft_size != n_fft);
|
||||
@ -1081,32 +1081,32 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const std::optional<int64_
|
||||
if (n_fft / 2 + 1 != fft_size) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected the frequency dimension (3rd to the last) of the input tensor to match n_fft / 2 + 1 when onesided=True, but got " << fft_size;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
} else {
|
||||
if (n_fft != fft_size) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected the frequency dimension (3rd to the last) of the input tensor to match n_fft when onesided=False, but got " << fft_size;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
if (!(0 < hop_length && hop_length <= win_length)) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected 0 < hop_length <= win_length";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
|
||||
if (!(0 < win_length && win_length <= n_fft)) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": expected 0 < win_length <= n_fft";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
if (window.defined()) {
|
||||
if (window.dim() != 1 || window.size(0) != win_length) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << ": Invalid window shape. window has to be 1D and length of `win_length`";
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1175,7 +1175,7 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const std::optional<int64_
|
||||
if (at::is_scalar_tensor_true(window_envelop_lowest)) {
|
||||
std::ostringstream ss;
|
||||
REPR(ss) << "window overlap add min: " << window_envelop_lowest;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
|
||||
y = (y / window_envelop); // size: (channel, expected_output_signal_len)
|
||||
|
||||
@ -63,7 +63,7 @@ inline int64_t infer_ft_complex_to_real_onesided_size(int64_t complex_size,
|
||||
std::ostringstream ss;
|
||||
ss << "expected real signal size " << expected_size << " is incompatible "
|
||||
<< "with onesided complex frequency size " << complex_size;
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -26,19 +26,19 @@ Tensor _bincount_cpu_template(
|
||||
const Tensor& weights,
|
||||
int64_t minlength) {
|
||||
if (minlength < 0) {
|
||||
AT_ERROR("minlength should be >= 0");
|
||||
TORCH_CHECK(false, "minlength should be >= 0");
|
||||
}
|
||||
if (self.dim() == 1 && self.numel() == 0) {
|
||||
return at::zeros({minlength}, kLong);
|
||||
}
|
||||
if (self.dim() != 1 || *self.min().data_ptr<input_t>() < 0) {
|
||||
AT_ERROR("bincount only supports 1-d non-negative integral inputs.");
|
||||
TORCH_CHECK(false, "bincount only supports 1-d non-negative integral inputs.");
|
||||
}
|
||||
|
||||
// Ensure max_val < 2 ^ 63 - 1 (9223372036854775807)
|
||||
auto max_val = *self.max().data_ptr<input_t>();
|
||||
if (max_val >= std::numeric_limits<int64_t>::max()) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"maximum value of input overflowed, it should be < ",
|
||||
std::numeric_limits<int64_t>::max(),
|
||||
" but got ",
|
||||
@ -48,7 +48,7 @@ Tensor _bincount_cpu_template(
|
||||
|
||||
bool has_weights = weights.defined();
|
||||
if (has_weights && (weights.dim() != 1 || weights.size(0) != self.size(0))) {
|
||||
AT_ERROR("weights should be 1-d and have the same length as input");
|
||||
TORCH_CHECK(false, "weights should be 1-d and have the same length as input");
|
||||
}
|
||||
|
||||
Tensor output;
|
||||
|
||||
@ -588,7 +588,7 @@ Tensor to_dense_backward(const Tensor& grad, const Tensor& input_, std::optional
|
||||
case kMkldnn:
|
||||
return grad.to_mkldnn(input_.scalar_type());
|
||||
default:
|
||||
AT_ERROR("to_dense_backward: Unsupported input layout: ", input_layout);
|
||||
TORCH_CHECK(false, "to_dense_backward: Unsupported input layout: ", input_layout);
|
||||
return Tensor{};
|
||||
}
|
||||
}
|
||||
@ -928,23 +928,23 @@ void _to_sparse_check_arguments(const std::string& funcname, const Tensor& self,
|
||||
|
||||
auto layout_from_valid = layout_from == kStrided || layout_from == kSparse || at::sparse_csr::is_sparse_compressed(layout_from);
|
||||
if (!layout_from_valid) {
|
||||
AT_ERROR(funcname, ": unexpected source layout ", layout_from);
|
||||
TORCH_CHECK(false, funcname, ": unexpected source layout ", layout_from);
|
||||
}
|
||||
|
||||
if (layout_from == kStrided) {
|
||||
if (sparse_dim == 0 && self.dim() > 0) {
|
||||
AT_ERROR(funcname, ": sparse_dim argument must be in >0 when self.dim()>0");
|
||||
TORCH_CHECK(false, funcname, ": sparse_dim argument must be in >0 when self.dim()>0");
|
||||
}
|
||||
if (sparse_dim < 0 || sparse_dim > self.dim()) {
|
||||
AT_ERROR(funcname, ": sparse_dim argument must be in [0,", self.dim(), "] range, but ", sparse_dim, " is given");
|
||||
TORCH_CHECK(false, funcname, ": sparse_dim argument must be in [0,", self.dim(), "] range, but ", sparse_dim, " is given");
|
||||
}
|
||||
} else if (layout_from == kSparse) {
|
||||
if (sparse_dim != self.sparse_dim()) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", kSparse, " with sparse_dim argument !=self.sparse_dim() is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", kSparse, " with sparse_dim argument !=self.sparse_dim() is not supported");
|
||||
}
|
||||
} else if (at::sparse_csr::is_sparse_compressed(layout_from)) {
|
||||
if (sparse_dim != 2) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", kSparse, " with sparse_dim argument !=2 is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", kSparse, " with sparse_dim argument !=2 is not supported");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -956,40 +956,40 @@ void _to_sparse_check_arguments(const std::string& funcname, const Tensor& self,
|
||||
|
||||
auto layout_from_valid = layout_from == kStrided || layout_from == kSparse || at::sparse_csr::is_sparse_compressed(layout_from);
|
||||
if (!layout_from_valid) {
|
||||
AT_ERROR(funcname, ": unexpected source layout ", layout_from);
|
||||
TORCH_CHECK(false, funcname, ": unexpected source layout ", layout_from);
|
||||
}
|
||||
auto layout_to_valid = layout_to == kStrided || layout_to == kSparse || at::sparse_csr::is_sparse_compressed(layout_to);
|
||||
if (!layout_to_valid) {
|
||||
AT_ERROR(funcname, ": unexpected source layout ", layout_from);
|
||||
TORCH_CHECK(false, funcname, ": unexpected source layout ", layout_from);
|
||||
}
|
||||
|
||||
if (layout_from == kSparse && layout_to != kSparse) {
|
||||
if (self.sparse_dim() != 2) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", layout_to, " for input tensors with sparse_dim()!=2 is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", layout_to, " for input tensors with sparse_dim()!=2 is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
if ((layout_from == kSparseCsr || layout_from == kSparseCsc) &&
|
||||
(layout_to == kSparseBsr || layout_to == kSparseBsc)) {
|
||||
if (sparse_csr::numBatchDimensions(self) > 0) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", layout_to, " for batched inputs is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", layout_to, " for batched inputs is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
if (blocksize.has_value()) {
|
||||
if (blocksize.value().size() != 2) {
|
||||
AT_ERROR(funcname, ": blocksize needs to be a tuple of size 2, but got ", blocksize.value().size());
|
||||
TORCH_CHECK(false, funcname, ": blocksize needs to be a tuple of size 2, but got ", blocksize.value().size());
|
||||
}
|
||||
auto blocksize_to = *blocksize;
|
||||
if (blocksize_to[0] <= 0 || blocksize_to[1] <= 0) {
|
||||
AT_ERROR(funcname, ": blocksize needs to be positive, but got ", blocksize_to);
|
||||
TORCH_CHECK(false, funcname, ": blocksize needs to be positive, but got ", blocksize_to);
|
||||
}
|
||||
|
||||
if (layout_to == kSparseBsr || layout_to == kSparseBsc) {
|
||||
if (layout_from == kSparseBsr || layout_from == kSparseBsc) {
|
||||
auto blocksize_from = at::sparse_csr::getBlockSize(self);
|
||||
if (!(blocksize_to == blocksize_from)) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", layout_to, " with blocksize changed from ", blocksize_from, " to ", blocksize_to, " is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", layout_to, " with blocksize changed from ", blocksize_from, " to ", blocksize_to, " is not supported");
|
||||
}
|
||||
} else {
|
||||
auto dense_dim = (layout_from == kStrided) ? dense_dim_opt.value_or(0) : self.dense_dim();
|
||||
@ -997,35 +997,35 @@ void _to_sparse_check_arguments(const std::string& funcname, const Tensor& self,
|
||||
auto sparse_col_dim = -(dense_dim + 1);
|
||||
if ((self.size(sparse_row_dim) % blocksize_to[0] != 0) ||
|
||||
(self.size(sparse_col_dim) % blocksize_to[1] != 0)) {
|
||||
AT_ERROR(funcname, ": tensor sparse size (", self.size(sparse_row_dim), ",", self.size(sparse_row_dim), ") must be divisible by given blocksize (", blocksize_to[0], ",", blocksize_to[1], ")");
|
||||
TORCH_CHECK(false, funcname, ": tensor sparse size (", self.size(sparse_row_dim), ",", self.size(sparse_row_dim), ") must be divisible by given blocksize (", blocksize_to[0], ",", blocksize_to[1], ")");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", layout_to, " with blocksize argument given is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", layout_to, " with blocksize argument given is not supported");
|
||||
}
|
||||
} else {
|
||||
if ((layout_to == kSparseBsr || layout_to == kSparseBsc) &&
|
||||
!(layout_from == kSparseBsr && layout_from == kSparseBsc)) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", layout_to, " without blocksize argument given is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", layout_to, " without blocksize argument given is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
if (dense_dim_opt.has_value()) {
|
||||
if (layout_from != kStrided) {
|
||||
AT_ERROR(funcname, ": conversion from ", layout_from, " to ", layout_to, " with dense_dim argument given is not supported");
|
||||
TORCH_CHECK(false, funcname, ": conversion from ", layout_from, " to ", layout_to, " with dense_dim argument given is not supported");
|
||||
}
|
||||
|
||||
auto dense_dim = *dense_dim_opt;
|
||||
if (layout_to == kSparse) {
|
||||
if (dense_dim == self.dim() && self.dim() > 0) {
|
||||
AT_ERROR(funcname, ": dense_dim argument must be !=self.dim() when self.dim()>0");
|
||||
TORCH_CHECK(false, funcname, ": dense_dim argument must be !=self.dim() when self.dim()>0");
|
||||
}
|
||||
if (dense_dim < 0 || dense_dim > self.dim()) {
|
||||
AT_ERROR(funcname, ": dense_dim argument must be in [0,", self.dim(), "] range, but ", dense_dim, " is given");
|
||||
TORCH_CHECK(false, funcname, ": dense_dim argument must be in [0,", self.dim(), "] range, but ", dense_dim, " is given");
|
||||
}
|
||||
} else {
|
||||
if (dense_dim < 0 || dense_dim > self.dim() - 2) {
|
||||
AT_ERROR(funcname, ": dense_dim argument must be in [0,", self.dim() - 2, "] range, but ", dense_dim, " is given");
|
||||
TORCH_CHECK(false, funcname, ": dense_dim argument must be in [0,", self.dim() - 2, "] range, but ", dense_dim, " is given");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1129,7 +1129,7 @@ Tensor dense_to_sparse_with_mask(const Tensor& self, const Tensor& mask, std::op
|
||||
break;
|
||||
}
|
||||
|
||||
AT_ERROR("dense_to_sparse_with_mask: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
TORCH_CHECK(false, "dense_to_sparse_with_mask: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1181,7 +1181,7 @@ Tensor dense_to_sparse(const Tensor& self, std::optional<c10::Layout> layout, Op
|
||||
break;
|
||||
}
|
||||
|
||||
AT_ERROR("dense_to_sparse: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
TORCH_CHECK(false, "dense_to_sparse: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1440,7 +1440,7 @@ Tensor sparse_compressed_to_sparse_csr(const Tensor& self, std::optional<int64_t
|
||||
return sparse_compressed_to_flipped(self, std::nullopt, "to_sparse_csr");
|
||||
}
|
||||
|
||||
AT_ERROR("sparse_compressed_to_sparse_csr: expected SparseCsr or SparseCsc layout but got ", self.layout());
|
||||
TORCH_CHECK(false, "sparse_compressed_to_sparse_csr: expected SparseCsr or SparseCsc layout but got ", self.layout());
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1453,7 +1453,7 @@ Tensor sparse_compressed_to_sparse_csc(const Tensor& self, std::optional<int64_t
|
||||
return sparse_compressed_to_flipped(self, std::nullopt, "to_sparse_csc");
|
||||
}
|
||||
|
||||
AT_ERROR("sparse_compressed_to_sparse_csc: expected SparseCsr or SparseCsc layout but got ", self.layout());
|
||||
TORCH_CHECK(false, "sparse_compressed_to_sparse_csc: expected SparseCsr or SparseCsc layout but got ", self.layout());
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1828,7 +1828,7 @@ Tensor sparse_compressed_to_sparse_bsr(const Tensor& self, IntArrayRef blocksize
|
||||
return self.to_sparse_csr(dense_dim_opt).to_sparse_bsr(blocksize);
|
||||
}
|
||||
|
||||
AT_ERROR("sparse_compressed_to_sparse_bsr: expected SparseCsr, SparseCsc, SparseBsr or SparseBsc layout but got ", self.layout());
|
||||
TORCH_CHECK(false, "sparse_compressed_to_sparse_bsr: expected SparseCsr, SparseCsc, SparseBsr or SparseBsc layout but got ", self.layout());
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1850,14 +1850,14 @@ Tensor sparse_compressed_to_sparse_bsc(const Tensor& self, IntArrayRef blocksize
|
||||
return self.to_sparse_csc(dense_dim_opt).to_sparse_bsc(blocksize);
|
||||
}
|
||||
|
||||
AT_ERROR("sparse_compressed_to_sparse_bsc: expected SparseCsr, SparseCsc, SparseBsr or SparseBsc layout but got ", self.layout());
|
||||
TORCH_CHECK(false, "sparse_compressed_to_sparse_bsc: expected SparseCsr, SparseCsc, SparseBsr or SparseBsc layout but got ", self.layout());
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
Tensor sparse_coo_to_sparse(const Tensor& self, const int64_t sparse_dim) {
|
||||
_to_sparse_check_arguments("sparse_coo_to_sparse", self, sparse_dim);
|
||||
|
||||
AT_ERROR("sparse_coo_to_sparse: ", self.layout(), " to ", kSparse, " conversion not supported");
|
||||
TORCH_CHECK(false, "sparse_coo_to_sparse: ", self.layout(), " to ", kSparse, " conversion not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1927,7 +1927,7 @@ Tensor sparse_compressed_to_sparse(const Tensor& self, std::optional<c10::Layout
|
||||
break;
|
||||
}
|
||||
|
||||
AT_ERROR("sparse_compressed_to_sparse: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
TORCH_CHECK(false, "sparse_compressed_to_sparse: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
@ -1951,7 +1951,7 @@ Tensor sparse_coo_to_sparse(const Tensor& self, std::optional<c10::Layout> layou
|
||||
break;
|
||||
}
|
||||
|
||||
AT_ERROR("sparse_coo_to_sparse: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
TORCH_CHECK(false, "sparse_coo_to_sparse: ", self.layout(), " to ", layout_to, " conversion not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
|
||||
|
||||
@ -101,7 +101,7 @@ bool cudnn_is_acceptable(const Tensor& self) {
|
||||
|
||||
Tensor & detach_(Tensor & self) {
|
||||
// this just exists to give us a hook in VariableType and an entry in Declarations.yaml
|
||||
//AT_ERROR("detach_ is not implemented for Tensor");
|
||||
//TORCH_CHECK(false, "detach_ is not implemented for Tensor");
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
@ -83,11 +83,11 @@ void cpu_max_unpool(
|
||||
|
||||
if (optional_error_index) {
|
||||
if constexpr (is_3d) {
|
||||
AT_ERROR("Found an invalid max index: ", optional_error_index.value(),
|
||||
TORCH_CHECK(false, "Found an invalid max index: ", optional_error_index.value(),
|
||||
" (output volumes are of size ", output_depth,
|
||||
"x", output_height, "x", output_width);
|
||||
} else {
|
||||
AT_ERROR("Found an invalid max index: ", optional_error_index.value(),
|
||||
TORCH_CHECK(false, "Found an invalid max index: ", optional_error_index.value(),
|
||||
" (output volumes are of size ", output_height,
|
||||
"x", output_width);
|
||||
}
|
||||
@ -151,7 +151,7 @@ void cpu_max_unpool_channels_last(
|
||||
});
|
||||
|
||||
if (optional_error_index) {
|
||||
AT_ERROR("Found an invalid max index: ", optional_error_index.value(),
|
||||
TORCH_CHECK(false, "Found an invalid max index: ", optional_error_index.value(),
|
||||
" (output volumes are of size ", output_height,
|
||||
"x", output_width, ")");
|
||||
}
|
||||
@ -223,12 +223,12 @@ void cpu_max_unpool_backward(
|
||||
|
||||
if (optional_error_index) {
|
||||
if (is_3d) {
|
||||
AT_ERROR("invalid max index ", optional_error_index.value(),
|
||||
TORCH_CHECK(false, "invalid max index ", optional_error_index.value(),
|
||||
", odepth= ", output_depth,
|
||||
", owidth= ", output_width,
|
||||
", oheight= ", output_height);
|
||||
} else {
|
||||
AT_ERROR("invalid max index ", optional_error_index.value(),
|
||||
TORCH_CHECK(false, "invalid max index ", optional_error_index.value(),
|
||||
", owidth= ", output_width,
|
||||
", oheight= ", output_height);
|
||||
}
|
||||
|
||||
@ -180,19 +180,10 @@ cuda::blas::GEMMAndBiasActivationEpilogue activation_to_gemm_and_blas_arg(Activa
|
||||
|
||||
static bool getDisableAddmmCudaLt() {
|
||||
static const char* env_value = std::getenv("DISABLE_ADDMM_CUDA_LT");
|
||||
// When DISABLE_ADDMM_CUDA_LT is unset the default is TRUE on
|
||||
// AMD architectures otherwise it is FALSE.
|
||||
#ifdef USE_ROCM
|
||||
if (env_value != nullptr && strcmp(env_value, "0") == 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
#else
|
||||
if (env_value != nullptr && strcmp(env_value, "1") == 0) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_ROCM
|
||||
@ -325,14 +316,6 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
|
||||
}
|
||||
self__sizes = self_->sizes();
|
||||
} else {
|
||||
#if defined(USE_ROCM)
|
||||
useLtInterface = !disable_addmm_cuda_lt &&
|
||||
result.dim() == 2 && result.is_contiguous() &&
|
||||
isSupportedHipLtROCmArch(self.device().index()) &&
|
||||
(scalar_type == at::ScalarType::Float ||
|
||||
scalar_type == at::ScalarType::Half ||
|
||||
scalar_type == at::ScalarType::BFloat16);
|
||||
#endif
|
||||
self_ = c10::MaybeOwned<Tensor>::borrowed(self);
|
||||
self__sizes = self_->sizes();
|
||||
TORCH_CHECK(result.dim() == 2, "tensors must be 2-D");
|
||||
|
||||
@ -66,7 +66,7 @@ static inline void CUFFT_CHECK(cufftResult error)
|
||||
if (error != CUFFT_SUCCESS) {
|
||||
std::ostringstream ss;
|
||||
ss << "cuFFT error: " << _cudaGetErrorEnum(error);
|
||||
AT_ERROR(ss.str());
|
||||
TORCH_CHECK(false, ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -462,7 +462,7 @@ Tensor _embedding_bag_dense_backward_cuda(const Tensor &grad_, const Tensor &ind
|
||||
padding_idx);
|
||||
|
||||
default:
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Unknown mode for embedding_bag_backward_cuda ", mode);
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,10 +45,10 @@ __device__ inline int get_interval(accscalar_t sample,
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void fractional_max_pool2d_out_cuda_frame(
|
||||
PackedTensorAccessor<scalar_t, 4> output,
|
||||
PackedTensorAccessor<int64_t, 4> indices,
|
||||
PackedTensorAccessor<const scalar_t, 4> input,
|
||||
PackedTensorAccessor<const scalar_t, 3> samples,
|
||||
GenericPackedTensorAccessor<scalar_t, 4> output,
|
||||
GenericPackedTensorAccessor<int64_t, 4> indices,
|
||||
GenericPackedTensorAccessor<const scalar_t, 4> input,
|
||||
GenericPackedTensorAccessor<const scalar_t, 3> samples,
|
||||
int poolSizeH, int poolSizeW) {
|
||||
|
||||
using accscalar_t = at::acc_type<scalar_t, /*is_cuda=*/true>;
|
||||
@ -102,9 +102,9 @@ __global__ void fractional_max_pool2d_out_cuda_frame(
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void fractional_max_pool2d_backward_out_cuda_frame(
|
||||
PackedTensorAccessor<scalar_t, 4> gradInput,
|
||||
PackedTensorAccessor<const scalar_t, 4> gradOutput,
|
||||
PackedTensorAccessor<const int64_t, 4> indices) {
|
||||
GenericPackedTensorAccessor<scalar_t, 4> gradInput,
|
||||
GenericPackedTensorAccessor<const scalar_t, 4> gradOutput,
|
||||
GenericPackedTensorAccessor<const int64_t, 4> indices) {
|
||||
// Output (h, w) point that this thread is responsible for
|
||||
int ourOutputPoint = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
int plane = blockIdx.y;
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
// ROCm 6.3 is planned to have these functions, but until then here they are.
|
||||
#if defined(USE_ROCM) && ROCM_VERSION >= 60201
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <hip/hip_bf16.h>
|
||||
|
||||
__device__ inline __hip_bfloat162 preview_unsafeAtomicAdd(__hip_bfloat162* address, __hip_bfloat162 value) {
|
||||
#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
|
||||
|
||||
@ -267,7 +267,7 @@ static void max_unpooling3d_shape_check(
|
||||
if (gradOutput.defined()) {
|
||||
if (oT != gradOutput.size(dimt) || oH != gradOutput.size(dimh) ||
|
||||
oW != gradOutput.size(dimw)) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Inconsistent gradOutput size. oT= ",
|
||||
oT,
|
||||
", oH= ",
|
||||
@ -447,7 +447,7 @@ at::Tensor& max_unpooling2d_backward_out_cuda(const Tensor& grad_output_,
|
||||
nInputRows = self.size(dimh);
|
||||
|
||||
if (oheight != grad_output.size(dimh) || owidth != grad_output.size(dimw)) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Inconsistent gradOutput size. output height: ",
|
||||
oheight,
|
||||
", output width= ",
|
||||
|
||||
@ -164,7 +164,7 @@ mixed_dtypes_linear_dispatch_bias_activation(
|
||||
ElementInputB,
|
||||
fastertransformer::EpilogueOpNoBias>(input, weight, scale, bias);
|
||||
}
|
||||
AT_ERROR("mixed_dtypes_linear_dispatch_bias_activation: Activation \"",
|
||||
TORCH_CHECK(false, "mixed_dtypes_linear_dispatch_bias_activation: Activation \"",
|
||||
activation, "\" is not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
@ -185,7 +185,7 @@ mixed_dtypes_linear_dispatch_bias_activation(
|
||||
ElementInputB,
|
||||
fastertransformer::EpilogueOpBiasSilu>(input, weight, scale, bias);
|
||||
}
|
||||
AT_ERROR("mixed_dtypes_linear_dispatch_bias_activation: Activation \"",
|
||||
TORCH_CHECK(false, "mixed_dtypes_linear_dispatch_bias_activation: Activation \"",
|
||||
activation, "\" is not supported");
|
||||
return Tensor{};
|
||||
}
|
||||
@ -198,7 +198,7 @@ _mixed_dtypes_linear(const Tensor& input, const Tensor& weight,
|
||||
const std::optional<Tensor>& bias_opt,
|
||||
const std::optional<c10::string_view> activation_opt) {
|
||||
#if defined(USE_ROCM) || defined(_MSC_VER) || (defined(CUDA_VERSION) && CUDA_VERSION < 11080)
|
||||
AT_ERROR("_mixed_dtypes_linear: not compiled for this platform");
|
||||
TORCH_CHECK(false, "_mixed_dtypes_linear: not compiled for this platform");
|
||||
return Tensor{};
|
||||
#else
|
||||
const auto bias = bias_opt.has_value() ? *bias_opt : Tensor{};
|
||||
|
||||
@ -88,7 +88,7 @@ static inline void slow_conv_transpose2d_shape_check(
|
||||
check_dim_size(bias, 1, 0, weight.size(1));
|
||||
}
|
||||
} else if (!weight_nullable) {
|
||||
AT_ERROR("weight tensor is expected to be non-nullable");
|
||||
TORCH_CHECK(false, "weight tensor is expected to be non-nullable");
|
||||
}
|
||||
|
||||
int ndim = input.dim();
|
||||
@ -115,7 +115,7 @@ static inline void slow_conv_transpose2d_shape_check(
|
||||
(dilation_width * (kernel_width - 1) + 1) + output_padding_width;
|
||||
|
||||
if (output_width < 1 || output_height < 1) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Given input size per channel: (",
|
||||
input_height,
|
||||
" x ",
|
||||
|
||||
@ -106,7 +106,7 @@ static inline void slow_conv_transpose3d_shape_check(
|
||||
check_dim_size(bias, 1, 0, weight.size(1));
|
||||
}
|
||||
} else if (!weight_nullable) {
|
||||
AT_ERROR("weight tensor is expected to be non-nullable");
|
||||
TORCH_CHECK(false, "weight tensor is expected to be non-nullable");
|
||||
}
|
||||
|
||||
int ndim = input.dim();
|
||||
@ -140,7 +140,7 @@ static inline void slow_conv_transpose3d_shape_check(
|
||||
(dilation_width * (kernel_width - 1) + 1) + output_padding_width;
|
||||
|
||||
if (output_depth < 1 || output_width < 1 || output_height < 1) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(false,
|
||||
"Given input size per channel: (",
|
||||
input_depth,
|
||||
" x ",
|
||||
|
||||
@ -184,7 +184,7 @@ struct KthValueLauncher {
|
||||
int64_t slice_size) {
|
||||
dim3 grid;
|
||||
if (!getGridFromTiles(num_slices, grid)) {
|
||||
AT_ERROR("slices are too many");
|
||||
TORCH_CHECK(false, "slices are too many");
|
||||
}
|
||||
|
||||
dim3 block(std::min(
|
||||
@ -221,7 +221,7 @@ struct MedianLauncher {
|
||||
int64_t slice_size) {
|
||||
dim3 grid;
|
||||
if (!getGridFromTiles(num_slices, grid)) {
|
||||
AT_ERROR("slices are too many");
|
||||
TORCH_CHECK(false, "slices are too many");
|
||||
}
|
||||
|
||||
dim3 block(std::min(
|
||||
|
||||
@ -12,10 +12,10 @@ namespace at::native {
|
||||
// sparse, sparse, sparse, dense, real, real -> sparse
|
||||
Tensor& _sspaddmm_out_only_sparse_cuda(const Tensor& self,
|
||||
const Tensor& mat1, const Tensor& mat2, const Scalar& beta, const Scalar& alpha, Tensor& result) {
|
||||
AT_ERROR("tensor.sspaddmm(...) can only be called on sparse tensors");
|
||||
TORCH_CHECK(false, "tensor.sspaddmm(...) can only be called on sparse tensors");
|
||||
}
|
||||
Tensor& _sspaddmm_out_cuda(const Tensor& self,
|
||||
const Tensor& mat1, const Tensor& mat2, const Scalar& beta, const Scalar& alpha, Tensor& result) {
|
||||
AT_ERROR("NYI: CUDA sspaddmm is not implemented");
|
||||
TORCH_CHECK(false, "NYI: CUDA sspaddmm is not implemented");
|
||||
}
|
||||
} // namespace at::native
|
||||
|
||||
@ -251,7 +251,7 @@ Tensor _bincount_cuda_template(
|
||||
const Tensor& weights,
|
||||
int64_t minlength) {
|
||||
if (minlength < 0) {
|
||||
AT_ERROR("minlength should be >= 0");
|
||||
TORCH_CHECK(false, "minlength should be >= 0");
|
||||
}
|
||||
if (self.dim() == 1 && self.numel() == 0) {
|
||||
return at::zeros(
|
||||
@ -264,12 +264,12 @@ Tensor _bincount_cuda_template(
|
||||
if (self.dim() != 1 ||
|
||||
(!std::is_same_v<input_t, uint8_t> &&
|
||||
*self.min().cpu().const_data_ptr<input_t>() < 0)) {
|
||||
AT_ERROR("bincount only supports 1-d non-negative integral inputs.");
|
||||
TORCH_CHECK(false, "bincount only supports 1-d non-negative integral inputs.");
|
||||
}
|
||||
|
||||
bool has_weights = weights.defined();
|
||||
if (has_weights && (weights.dim() != 1 || weights.size(0) != self.size(0))) {
|
||||
AT_ERROR("weights should be 1-d and have the same length as input");
|
||||
TORCH_CHECK(false, "weights should be 1-d and have the same length as input");
|
||||
}
|
||||
|
||||
const int64_t nbins =
|
||||
@ -312,7 +312,7 @@ Tensor _histc_cuda_template(
|
||||
at::acc_type<input_t, /*is_cuda=*/true> min,
|
||||
at::acc_type<input_t, /*is_cuda=*/true> max) {
|
||||
if (nbins <= 0) {
|
||||
AT_ERROR("bins must be > 0");
|
||||
TORCH_CHECK(false, "bins must be > 0");
|
||||
}
|
||||
Tensor output = at::zeros(
|
||||
{nbins},
|
||||
@ -387,7 +387,7 @@ Tensor _histc_cuda(
|
||||
const Scalar& min,
|
||||
const Scalar& max) {
|
||||
if (self.scalar_type() == ScalarType::Half) {
|
||||
AT_ERROR("HalfTensor is not supported");
|
||||
TORCH_CHECK(false, "HalfTensor is not supported");
|
||||
}
|
||||
// See Note [Writing Nondeterministic Operations]
|
||||
// Nondeterministic because of atomicAdd usage
|
||||
|
||||
@ -37,8 +37,8 @@ __global__ void upsample_bilinear2d_out_frame(
|
||||
const accscalar_t rheight,
|
||||
const accscalar_t rwidth,
|
||||
const bool align_corners,
|
||||
const PackedTensorAccessor<const scalar_t, 4> idata,
|
||||
PackedTensorAccessor<scalar_t, 4> odata) {
|
||||
const GenericPackedTensorAccessor<const scalar_t, 4> idata,
|
||||
GenericPackedTensorAccessor<scalar_t, 4> odata) {
|
||||
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
const int batchsize = idata.size(0);
|
||||
|
||||
@ -1158,7 +1158,7 @@ REGISTER_CUDA_DISPATCH(ldl_solve_stub, &ldl_solve_kernel)
|
||||
template <typename scalar_t>
|
||||
static void apply_cholesky_solve(Tensor& b, Tensor& A, bool upper, int64_t& info) {
|
||||
#if !AT_MAGMA_ENABLED()
|
||||
AT_ERROR("cholesky_solve: MAGMA library not found in "
|
||||
TORCH_CHECK(false, "cholesky_solve: MAGMA library not found in "
|
||||
"compilation. Please rebuild with MAGMA.");
|
||||
#else
|
||||
magma_uplo_t uplo = upper ? MagmaUpper : MagmaLower;
|
||||
@ -1476,7 +1476,7 @@ template <typename scalar_t>
|
||||
static void apply_lu_factor_looped_magma(const Tensor& input, const Tensor& pivots, const Tensor& infos, bool compute_pivots) {
|
||||
#if !AT_MAGMA_ENABLED()
|
||||
// This should never be thrown if the calling functions are correct.
|
||||
AT_ERROR("linalg.lu_factor: PyTorch was not compiled with MAGMA support.");
|
||||
TORCH_CHECK(false, "linalg.lu_factor: PyTorch was not compiled with MAGMA support.");
|
||||
#else
|
||||
// magmaLu and magmaLuNoPiv require infos and pivots tensor to be on CPU
|
||||
// the data is later copied back to the appropriate output tensor
|
||||
@ -1677,7 +1677,7 @@ REGISTER_CUDA_DISPATCH(lu_factor_stub, &lu_factor);
|
||||
template <typename scalar_t>
|
||||
static void apply_triangular_solve_batched_magma(const Tensor& A, const Tensor& b, bool left, bool upper, TransposeType transpose, bool unitriangular) {
|
||||
#if !AT_MAGMA_ENABLED()
|
||||
AT_ERROR("triangular_solve: MAGMA library not found in "
|
||||
TORCH_CHECK(false, "triangular_solve: MAGMA library not found in "
|
||||
"compilation. Please rebuild with MAGMA.");
|
||||
#else
|
||||
magma_uplo_t uplo = upper ? MagmaUpper : MagmaLower;
|
||||
@ -2106,7 +2106,7 @@ static void apply_svd_magma(const Tensor& A,
|
||||
const Tensor& Vh,
|
||||
const Tensor& info) {
|
||||
#if !AT_MAGMA_ENABLED()
|
||||
AT_ERROR("linalg.svd: MAGMA library not found in "
|
||||
TORCH_CHECK(false, "linalg.svd: MAGMA library not found in "
|
||||
"compilation. Please rebuild with MAGMA.");
|
||||
#else
|
||||
using value_t = typename c10::scalar_value_type<scalar_t>::type;
|
||||
|
||||
@ -59,7 +59,7 @@ struct MAGMAQueue {
|
||||
static inline magma_int_t magma_int_cast(int64_t value, const char* varname) {
|
||||
auto result = static_cast<magma_int_t>(value);
|
||||
if (static_cast<int64_t>(result) != value) {
|
||||
AT_ERROR("magma: The value of ", varname, "(", (long long)value,
|
||||
TORCH_CHECK(false, "magma: The value of ", varname, "(", (long long)value,
|
||||
") is too large to fit into a magma_int_t (", sizeof(magma_int_t), " bytes)");
|
||||
}
|
||||
return result;
|
||||
|
||||
@ -25,7 +25,8 @@ Tensor cudnn_affine_grid_generator_forward(
|
||||
int64_t C,
|
||||
int64_t H,
|
||||
int64_t W) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"cudnn_affine_grid_generator_forward: ATen not compiled with cuDNN support");
|
||||
}
|
||||
|
||||
@ -35,7 +36,8 @@ Tensor cudnn_affine_grid_generator_backward(
|
||||
int64_t C,
|
||||
int64_t H,
|
||||
int64_t W) {
|
||||
AT_ERROR(
|
||||
TORCH_CHECK(
|
||||
false,
|
||||
"cudnn_affine_grid_generator_backward: ATen not compiled with cuDNN support");
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user