[AOTI][refactor] Organize model runner files (#116022)

Summary: Move runner util files into a subdirectory and put AOTIModelContainerRunnerCpu into a separate file

Differential Revision: [D52300693](https://our.internmc.facebook.com/intern/diff/D52300693)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/116022
Approved by: https://github.com/khabinov
This commit is contained in:
Bin Bao
2023-12-19 06:11:01 -08:00
committed by PyTorch MergeBot
parent 4d6a1ad400
commit fabf9433e7
11 changed files with 38 additions and 31 deletions

View File

@ -464,7 +464,7 @@ lazy_tensor_core_python_sources = [
]
inductor_core_resources = [
"torch/csrc/inductor/aoti_model_container_runner.cpp",
"torch/csrc/inductor/aoti_runner/model_container_runner.cpp",
"torch/csrc/inductor/aoti_torch/shim_common.cpp",
"torch/csrc/inductor/aoti_torch/tensor_converter.cpp",
"torch/csrc/inductor/inductor_ops.cpp",
@ -652,7 +652,7 @@ libtorch_cuda_core_sources = [
"torch/csrc/CudaIPCTypes.cpp",
"torch/csrc/cuda/comm.cpp",
"torch/csrc/cuda/memory_snapshot.cpp",
"torch/csrc/inductor/aoti_model_container_runner_cuda.cpp",
"torch/csrc/inductor/aoti_runner/model_container_runner_cuda.cpp",
"torch/csrc/inductor/aoti_torch/shim_cuda.cpp",
"torch/csrc/jit/codegen/fuser/cuda/fused_kernel.cpp",
"torch/csrc/profiler/stubs/cuda.cpp",

View File

@ -90,8 +90,8 @@ previous step, enabling us to conduct model predictions directly within a C++ en
The following code snippet assumes your system has a CUDA-enabled device and your model was
compiled to run on CUDA as shown previously.
In the absence of a GPU, it's necessary to make these adjustments in order to run it on a CPU:
1. Modify ``aoti_model_container_runner_cuda.h`` to ``aoti_model_container_runner.h``
2. Change ``AOTIModelContainerRunnerCuda`` to ``AOTIModelContainerRunner``
1. Change ``model_container_runner_cuda.h`` to ``model_container_runner_cpu.h``
2. Change ``AOTIModelContainerRunnerCuda`` to ``AOTIModelContainerRunnerCpu``
3. Change ``at::kCUDA`` to ``at::kCPU``
.. code-block:: cpp
@ -100,7 +100,7 @@ previous step, enabling us to conduct model predictions directly within a C++ en
#include <vector>
#include <torch/torch.h>
#include <torch/csrc/inductor/aoti_model_container_runner_cuda.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h>
int main() {
c10::InferenceMode mode;

View File

@ -1217,6 +1217,7 @@ def main():
"include/torch/csrc/distributed/autograd/rpc_messages/*.h",
"include/torch/csrc/dynamo/*.h",
"include/torch/csrc/inductor/*.h",
"include/torch/csrc/inductor/aoti_runner/*.h",
"include/torch/csrc/inductor/aoti_runtime/*.h",
"include/torch/csrc/inductor/aoti_torch/*.h",
"include/torch/csrc/inductor/aoti_torch/c/*.h",

View File

@ -1,8 +1,8 @@
#include <stdexcept>
#include <torch/csrc/inductor/aoti_model_container_runner.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h>
#ifdef USE_CUDA
#include <torch/csrc/inductor/aoti_model_container_runner_cuda.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h>
#endif
#include "aoti_custom_class.h"

View File

@ -3,9 +3,9 @@
#include <string>
#include <vector>
#include <torch/csrc/inductor/aoti_model_container_runner.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h>
#ifdef USE_CUDA
#include <torch/csrc/inductor/aoti_model_container_runner_cuda.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h>
#endif
#include <torch/script.h>
#include <torch/torch.h>

View File

@ -1195,7 +1195,7 @@ class Placeholder(enum.Enum):
def aot_inductor_launcher(so_path: str, device: str):
if device == "cuda":
return f"""
#include <torch/csrc/inductor/aoti_model_container_runner_cuda.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h>
torch::inductor::AOTIModelContainerRunnerCuda runner("{so_path}");
@ -1209,7 +1209,7 @@ def aot_inductor_launcher(so_path: str, device: str):
"""
elif device == "cpu":
return f"""
#include <torch/csrc/inductor/aoti_model_container_runner.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h>
torch::inductor::AOTIModelContainerRunnerCpu runner("{so_path}");

View File

@ -1,7 +1,7 @@
#if !defined(C10_MOBILE) && !defined(ANDROID)
#include <ATen/DynamicLibrary.h>
#include <torch/csrc/inductor/aoti_model_container_runner.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner.h>
#include <torch/csrc/inductor/aoti_torch/tensor_converter.h>
namespace torch::inductor {

View File

@ -60,22 +60,5 @@ class TORCH_API AOTIModelContainerRunner {
AOTInductorModelContainerHandle container_handle_ = nullptr;
};
class TORCH_API AOTIModelContainerRunnerCpu : public AOTIModelContainerRunner {
public:
AOTIModelContainerRunnerCpu(const char* model_path, size_t num_models = 1)
: AOTIModelContainerRunner(model_path, num_models, true, nullptr) {}
std::vector<at::Tensor> run(
std::vector<at::Tensor> inputs,
AOTIProxyExecutorHandle proxy_executor_handle = nullptr) {
return AOTIModelContainerRunner::run(
inputs, nullptr, proxy_executor_handle);
}
std::vector<const char*> get_call_spec() {
return AOTIModelContainerRunner::get_call_spec();
}
};
} // namespace torch::inductor
#endif

View File

@ -0,0 +1,23 @@
#pragma once
#include <torch/csrc/inductor/aoti_runner/model_container_runner.h>
namespace torch::inductor {
class TORCH_API AOTIModelContainerRunnerCpu : public AOTIModelContainerRunner {
public:
AOTIModelContainerRunnerCpu(const char* model_path, size_t num_models = 1)
: AOTIModelContainerRunner(model_path, num_models, true, nullptr) {}
std::vector<at::Tensor> run(
std::vector<at::Tensor> inputs,
AOTIProxyExecutorHandle proxy_executor_handle = nullptr) {
return AOTIModelContainerRunner::run(
inputs, nullptr, proxy_executor_handle);
}
std::vector<const char*> get_call_spec() {
return AOTIModelContainerRunner::get_call_spec();
}
};
} // namespace torch::inductor

View File

@ -1,5 +1,5 @@
#include <c10/cuda/CUDAStream.h>
#include <torch/csrc/inductor/aoti_model_container_runner_cuda.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h>
namespace torch::inductor {

View File

@ -1,6 +1,6 @@
#pragma once
#include <torch/csrc/inductor/aoti_model_container_runner.h>
#include <torch/csrc/inductor/aoti_runner/model_container_runner.h>
namespace torch::inductor {