[nativert] triton runtime implementation (#161798)

Summary: att Test Plan: ci Rollback Plan: Reviewed By: minjang Differential Revision: D80828148 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161798 Approved by: https://github.com/minjang, https://github.com/SherlockNoMad
2025-10-20 21:14:14 +08:00 · 2025-09-04 19:00:11 +00:00
parent 1f51056bd6
commit 3dde5d7f9b
12 changed files with 578 additions and 1 deletions
--- a/build_variables.bzl
+++ b/build_variables.bzl
@ -635,6 +635,12 @@ libtorch_nativert_sources = [
    "torch/nativert/graph/passes/pass_manager/GraphPasses.cpp",
    "torch/nativert/graph/passes/pass_manager/PassManager.cpp",
    "torch/nativert/kernels/KernelHandlerRegistry.cpp",
+    "torch/nativert/kernels/TritonKernel.cpp",
+    "torch/nativert/executor/triton/CpuTritonKernelManager.cpp",
+]
+
+libtorch_nativert_cuda_sources = [
+    "torch/nativert/executor/triton/CudaTritonKernelManager.cpp",
 ]

 torch_mobile_tracer_sources = [
@ -770,7 +776,7 @@ libtorch_cuda_distributed_sources = libtorch_cuda_distributed_base_sources + lib

 libtorch_cuda_sources = libtorch_cuda_core_sources + libtorch_cuda_distributed_sources + [
    "torch/csrc/cuda/nccl.cpp",
-]
+] + libtorch_nativert_cuda_sources

 torch_cpp_srcs = [
    "torch/csrc/api/src/cuda.cpp",  # this just forwards stuff, no real CUDA