Shard RegisterDispatchKey (#144364)

Should fix https://github.com/pytorch/pytorch/issues/143952 .

Testing: built PyTorch on Raspberry Pi 5; this seemed to alleviate high peak memory requirement. (I did increase shard counts for other generated files along the way, but I need to go back and figure out how much of that was strictly necessary vs. needing to use -j1 or -j2.)

Differential Revision: [D67925496](https://our.internmc.facebook.com/intern/diff/D67925496/)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/144364
Approved by: https://github.com/Skylion007, https://github.com/bdhirsh
ghstack dependencies: #144363
This commit is contained in:
Scott Wolchok
2025-01-09 15:00:21 -08:00
committed by PyTorch MergeBot
parent 4143312e67
commit b46d00c1b7
5 changed files with 129 additions and 79 deletions

View File

@ -202,31 +202,34 @@ GENERATED_H_CUDA = [
]
GENERATED_CPP_CUDA = [
"RegisterCUDA.cpp",
"RegisterNestedTensorCUDA.cpp",
"RegisterSparseCUDA.cpp",
"RegisterSparseCsrCUDA.cpp",
"RegisterQuantizedCUDA.cpp",
"RegisterCUDA_0.cpp",
"RegisterNestedTensorCUDA_0.cpp",
"RegisterSparseCUDA_0.cpp",
"RegisterSparseCsrCUDA_0.cpp",
"RegisterQuantizedCUDA_0.cpp",
]
GENERATED_CPP = [
"Functions.cpp",
"RegisterBackendSelect.cpp",
"RegisterCPU.cpp",
"RegisterQuantizedCPU.cpp",
"RegisterNestedTensorCPU.cpp",
"RegisterSparseCPU.cpp",
"RegisterSparseCsrCPU.cpp",
"RegisterMkldnnCPU.cpp",
"RegisterCompositeImplicitAutograd.cpp",
"RegisterCompositeImplicitAutogradNestedTensor.cpp",
"RegisterZeroTensor.cpp",
"RegisterMeta.cpp",
"RegisterQuantizedMeta.cpp",
"RegisterNestedTensorMeta.cpp",
"RegisterSparseMeta.cpp",
"RegisterCompositeExplicitAutograd.cpp",
"RegisterCompositeExplicitAutogradNonFunctional.cpp",
"RegisterCPU_0.cpp",
"RegisterCPU_1.cpp",
"RegisterCPU_2.cpp",
"RegisterCPU_3.cpp",
"RegisterQuantizedCPU_0.cpp",
"RegisterNestedTensorCPU_0.cpp",
"RegisterSparseCPU_0.cpp",
"RegisterSparseCsrCPU_0.cpp",
"RegisterMkldnnCPU_0.cpp",
"RegisterCompositeImplicitAutograd_0.cpp",
"RegisterCompositeImplicitAutogradNestedTensor_0.cpp",
"RegisterZeroTensor_0.cpp",
"RegisterMeta_0.cpp",
"RegisterQuantizedMeta_0.cpp",
"RegisterNestedTensorMeta_0.cpp",
"RegisterSparseMeta_0.cpp",
"RegisterCompositeExplicitAutograd_0.cpp",
"RegisterCompositeExplicitAutogradNonFunctional_0.cpp",
"CompositeViewCopyKernels.cpp",
"RegisterSchema.cpp",
"RegisterFunctionalization_0.cpp",