diff --git a/build_variables.bzl b/build_variables.bzl index b028a9b28c02..def6eac76e54 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -464,7 +464,7 @@ lazy_tensor_core_python_sources = [ ] inductor_core_resources = [ - "torch/csrc/inductor/aoti_model_container_runner.cpp", + "torch/csrc/inductor/aoti_runner/model_container_runner.cpp", "torch/csrc/inductor/aoti_torch/shim_common.cpp", "torch/csrc/inductor/aoti_torch/tensor_converter.cpp", "torch/csrc/inductor/inductor_ops.cpp", @@ -652,7 +652,7 @@ libtorch_cuda_core_sources = [ "torch/csrc/CudaIPCTypes.cpp", "torch/csrc/cuda/comm.cpp", "torch/csrc/cuda/memory_snapshot.cpp", - "torch/csrc/inductor/aoti_model_container_runner_cuda.cpp", + "torch/csrc/inductor/aoti_runner/model_container_runner_cuda.cpp", "torch/csrc/inductor/aoti_torch/shim_cuda.cpp", "torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp", "torch/csrc/profiler/stubs/cuda.cpp", diff --git a/docs/source/torch.compiler_aot_inductor.rst b/docs/source/torch.compiler_aot_inductor.rst index 59ca0b981385..efc395c18cf1 100644 --- a/docs/source/torch.compiler_aot_inductor.rst +++ b/docs/source/torch.compiler_aot_inductor.rst @@ -90,8 +90,8 @@ previous step, enabling us to conduct model predictions directly within a C++ en The following code snippet assumes your system has a CUDA-enabled device and your model was compiled to run on CUDA as shown previously. In the absence of a GPU, it's necessary to make these adjustments in order to run it on a CPU: - 1. Modify ``aoti_model_container_runner_cuda.h`` to ``aoti_model_container_runner.h`` - 2. Change ``AOTIModelContainerRunnerCuda`` to ``AOTIModelContainerRunner`` + 1. Change ``model_container_runner_cuda.h`` to ``model_container_runner_cpu.h`` + 2. Change ``AOTIModelContainerRunnerCuda`` to ``AOTIModelContainerRunnerCpu`` 3. Change ``at::kCUDA`` to ``at::kCPU`` .. code-block:: cpp @@ -100,7 +100,7 @@ previous step, enabling us to conduct model predictions directly within a C++ en #include #include - #include + #include int main() { c10::InferenceMode mode; diff --git a/setup.py b/setup.py index 86cad767c120..3ca6691309c7 100644 --- a/setup.py +++ b/setup.py @@ -1217,6 +1217,7 @@ def main(): "include/torch/csrc/distributed/autograd/rpc_messages/*.h", "include/torch/csrc/dynamo/*.h", "include/torch/csrc/inductor/*.h", + "include/torch/csrc/inductor/aoti_runner/*.h", "include/torch/csrc/inductor/aoti_runtime/*.h", "include/torch/csrc/inductor/aoti_torch/*.h", "include/torch/csrc/inductor/aoti_torch/c/*.h", diff --git a/test/cpp/aot_inductor/aoti_custom_class.cpp b/test/cpp/aot_inductor/aoti_custom_class.cpp index 04444767cf54..22dc889fe2f6 100644 --- a/test/cpp/aot_inductor/aoti_custom_class.cpp +++ b/test/cpp/aot_inductor/aoti_custom_class.cpp @@ -1,8 +1,8 @@ #include -#include +#include #ifdef USE_CUDA -#include +#include #endif #include "aoti_custom_class.h" diff --git a/test/cpp/aot_inductor/test.cpp b/test/cpp/aot_inductor/test.cpp index 4bb1fb81c9da..d6ed98a25bc8 100644 --- a/test/cpp/aot_inductor/test.cpp +++ b/test/cpp/aot_inductor/test.cpp @@ -3,9 +3,9 @@ #include #include -#include +#include #ifdef USE_CUDA -#include +#include #endif #include #include diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index 54633723f74b..fd760433ca22 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -1195,7 +1195,7 @@ class Placeholder(enum.Enum): def aot_inductor_launcher(so_path: str, device: str): if device == "cuda": return f""" - #include + #include torch::inductor::AOTIModelContainerRunnerCuda runner("{so_path}"); @@ -1209,7 +1209,7 @@ def aot_inductor_launcher(so_path: str, device: str): """ elif device == "cpu": return f""" - #include + #include torch::inductor::AOTIModelContainerRunnerCpu runner("{so_path}"); diff --git a/torch/csrc/inductor/aoti_model_container_runner.cpp b/torch/csrc/inductor/aoti_runner/model_container_runner.cpp similarity index 98% rename from torch/csrc/inductor/aoti_model_container_runner.cpp rename to torch/csrc/inductor/aoti_runner/model_container_runner.cpp index 30478257e10b..afab1ee78de2 100644 --- a/torch/csrc/inductor/aoti_model_container_runner.cpp +++ b/torch/csrc/inductor/aoti_runner/model_container_runner.cpp @@ -1,7 +1,7 @@ #if !defined(C10_MOBILE) && !defined(ANDROID) #include -#include +#include #include namespace torch::inductor { diff --git a/torch/csrc/inductor/aoti_model_container_runner.h b/torch/csrc/inductor/aoti_runner/model_container_runner.h similarity index 79% rename from torch/csrc/inductor/aoti_model_container_runner.h rename to torch/csrc/inductor/aoti_runner/model_container_runner.h index c68c6341aefd..321cbd714677 100644 --- a/torch/csrc/inductor/aoti_model_container_runner.h +++ b/torch/csrc/inductor/aoti_runner/model_container_runner.h @@ -60,22 +60,5 @@ class TORCH_API AOTIModelContainerRunner { AOTInductorModelContainerHandle container_handle_ = nullptr; }; -class TORCH_API AOTIModelContainerRunnerCpu : public AOTIModelContainerRunner { - public: - AOTIModelContainerRunnerCpu(const char* model_path, size_t num_models = 1) - : AOTIModelContainerRunner(model_path, num_models, true, nullptr) {} - - std::vector run( - std::vector inputs, - AOTIProxyExecutorHandle proxy_executor_handle = nullptr) { - return AOTIModelContainerRunner::run( - inputs, nullptr, proxy_executor_handle); - } - - std::vector get_call_spec() { - return AOTIModelContainerRunner::get_call_spec(); - } -}; - } // namespace torch::inductor #endif diff --git a/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h b/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h new file mode 100644 index 000000000000..02a221230c52 --- /dev/null +++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace torch::inductor { +class TORCH_API AOTIModelContainerRunnerCpu : public AOTIModelContainerRunner { + public: + AOTIModelContainerRunnerCpu(const char* model_path, size_t num_models = 1) + : AOTIModelContainerRunner(model_path, num_models, true, nullptr) {} + + std::vector run( + std::vector inputs, + AOTIProxyExecutorHandle proxy_executor_handle = nullptr) { + return AOTIModelContainerRunner::run( + inputs, nullptr, proxy_executor_handle); + } + + std::vector get_call_spec() { + return AOTIModelContainerRunner::get_call_spec(); + } +}; + +} // namespace torch::inductor diff --git a/torch/csrc/inductor/aoti_model_container_runner_cuda.cpp b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.cpp similarity index 88% rename from torch/csrc/inductor/aoti_model_container_runner_cuda.cpp rename to torch/csrc/inductor/aoti_runner/model_container_runner_cuda.cpp index 84d104cef13c..9b330446e2ba 100644 --- a/torch/csrc/inductor/aoti_model_container_runner_cuda.cpp +++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace torch::inductor { diff --git a/torch/csrc/inductor/aoti_model_container_runner_cuda.h b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h similarity index 89% rename from torch/csrc/inductor/aoti_model_container_runner_cuda.h rename to torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h index bf63464b87e6..09a39eba8f75 100644 --- a/torch/csrc/inductor/aoti_model_container_runner_cuda.h +++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace torch::inductor {