Add option to get input dtype from user (#68751)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/68751 Add option to get input dtype from user for AOT compilation Test Plan: BI model compiles and runs fine ``` (pytorch) ~/fbsource/fbcode/caffe2/fb/nnc └─ $ buck run //caffe2/binaries:aot_model_compiler -- --model=bi.pt --model_name=pytorch_dev_bytedoc --model_version=v1 '--input_dims=1,115;1' --input_types='int64;int64' Building... 8.3 sec (99%) 7673/7674 jobs, 0/7674 updated WARNING: Logging before InitGoogleLogging() is written to STDERR W1116 14:32:44.632536 1332111 TensorImpl.h:1418] Warning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (function operator()) E1116 14:32:44.673710 1332111 huge_pages_allocator.cc:287] Not using huge pages because not linked with jemalloc The compiled llvm assembly code was saved to bi.compiled.ll The compiled model was saved to bi.compiled.pt ``` > Error thrown when input dims and input types sizes don't match ``` (pytorch) ~/fbsource/fbcode/caffe2/fb/nnc └─ $ buck run //caffe2/binaries:aot_model_compiler -- --model=bi.pt --model_name=pytorch_dev_bytedoc --model_version=v1 '--input_dims=1,115;1' --input_types='int64;int64;int64' . . terminate called after throwing an instance of 'c10::Error' what(): [enforce fail at aot_model_compiler.cc:208] split(';', FLAGS_input_dims).size() == split(';', FLAGS_input_types).size(). Number of input_dims and input_types should be the same . . . ``` Reviewed By: ljk53 Differential Revision: D32477001 fbshipit-source-id: 8977b0b59cf78b3a2fec0c8428f83a16ad8685c5
2025-10-20 21:14:14 +08:00 · 2021-11-29 21:38:29 -08:00
parent ac1fe91dc9
commit 8cc9ec2f6b
3 changed files with 76 additions and 22 deletions
--- a/binaries/aot_model_compiler.cc
+++ b/binaries/aot_model_compiler.cc
@ -1,6 +1,8 @@
 #include <sstream>
 #include <string>

+#include <ATen/core/jit_type.h>
+#include <c10/core/ScalarType.h>
 #include <torch/csrc/jit/backends/backend.h>
 #include <torch/csrc/jit/backends/backend_detail.h>
 #include <torch/csrc/jit/backends/backend_preprocess.h>
@ -25,9 +27,16 @@ C10_DEFINE_string(model_version, "", "The version of the model.");
 C10_DEFINE_string(
    input_dims,
    "",
-    "For input float TensorCPUs, specify the dimension using comma "
-    "separated numbers. If multiple inputs needed, use semicolon "
-    "to separate the dimension of different tensors.");
+    "The dimensions of input TensorCPUs using comma separated numbers."
+    "If multiple inputs needed, use semicolon to separate "
+    "the dimension of different tensors.");
+C10_DEFINE_string(
+    input_types,
+    "float",
+    "The dtype of input TensorCPUs."
+    "If multiple inputs needed, use semicolon to separate "
+    "the dtype of different tensors."
+    "Supported dtypes: float, int64, uint8");
 C10_DEFINE_string(method_name, "forward", "The name of the method.");
 C10_DEFINE_string(
    output_llvm,
@ -68,18 +77,39 @@ std::vector<std::vector<int64_t>> parseInputShapes() {
  return inputs;
 }

+std::vector<at::ScalarType> parseInputTypes() {
+  std::vector<std::string> inputTypes = split(';', FLAGS_input_types);
+  std::vector<at::ScalarType> scalarTypes;
+  for (const auto& inputType : inputTypes) {
+    at::ScalarType scalarType;
+    if (inputType == "float") {
+      scalarType = at::ScalarType::Float;
+    } else if (inputType == "uint8") {
+      scalarType = at::ScalarType::Byte;
+    } else if (inputType == "int64") {
+      scalarType = at::ScalarType::Long;
+    } else {
+      CAFFE_THROW("Unsupported input type: ", inputType);
+    }
+    scalarTypes.push_back(scalarType);
+  }
+  return scalarTypes;
+}
+
 c10::Dict<c10::IValue, c10::IValue> createCompileSpec() {
  c10::Dict<c10::IValue, c10::IValue> compile_spec(
      c10::StringType::get(), c10::AnyType::get());
  c10::Dict<c10::IValue, c10::IValue> method_spec(
      c10::StringType::get(), c10::AnyType::get());
-  auto input_shapes = parseInputShapes();
-  method_spec.insert("sizes", input_shapes);
+  auto inputShapes = parseInputShapes();
+  auto inputTypes = parseInputTypes();
+  method_spec.insert("sizes", inputShapes);
+  method_spec.insert("types", inputTypes);
  compile_spec.insert(FLAGS_method_name, method_spec);
  return compile_spec;
 }

-std::vector<std::vector<int64_t>> getInputSizes (
+std::vector<std::vector<int64_t>> getInputSizes(
    const c10::Dict<c10::IValue, c10::IValue>& compile_spec) {
  auto input_shapes = compile_spec.at(FLAGS_method_name).toGenericDict().at("sizes").toList();
  std::vector<std::vector<int64_t>> inputSizes;
@ -90,6 +120,17 @@ std::vector<std::vector<int64_t>> getInputSizes (
  return inputSizes;
 }

+std::vector<at::ScalarType> getInputTypes(
+    const c10::Dict<c10::IValue, c10::IValue>& compile_spec) {
+  auto inputTypesList = compile_spec.at(FLAGS_method_name).toGenericDict().at("types").toList();
+  std::vector<at::ScalarType> inputTypes;
+  for (const auto& inputType : inputTypesList) {
+    auto type = ((c10::IValue) inputType).toScalarType();
+    inputTypes.emplace_back(type);
+  }
+  return inputTypes;
+}
+
 std::string getNncKernelId() {
  // TODO: calculate the version_token.
  const std::string version_token = "VERTOKEN";
@ -122,10 +163,11 @@ c10::IValue preprocess(
  auto method = mod.get_method(FLAGS_method_name);
  auto graph = toGraphFunction(method.function()).graph()->copy();
  auto sizes = getInputSizes(compile_spec);
+  auto types = getInputTypes(compile_spec);
  auto kernel_func_name = getNncKernelFuncName(FLAGS_method_name);

  auto compiled = torch::jit::mobile::nnc::aotCompile(
-      FLAGS_method_name, graph, sizes, kernel_func_name);
+      FLAGS_method_name, graph, sizes, types, kernel_func_name);
  writeOutputLlvmAssembly(compiled.second);

  auto func = std::move(compiled.first);
@ -148,6 +190,7 @@ int main(int argc, char** argv) {
      " --model_name=<model name>"
      " --model_version=<model version>"
      " --input_dims=<input dimensions like '1,3,224,224;2,2'>"
+      " --input_types=<input dtypes like 'float;float'>"
      " [--method_name=<method name>]"
      " [--output_llvm=<llvm assembly output file path>]"
      " [--output_model=<output model file path>]");
@ -162,6 +205,8 @@ int main(int argc, char** argv) {
  CAFFE_ENFORCE(!FLAGS_model_name.empty(), c10::UsageMessage());
  CAFFE_ENFORCE(!FLAGS_model_version.empty(), c10::UsageMessage());
  CAFFE_ENFORCE(!FLAGS_input_dims.empty(), c10::UsageMessage());
+  CAFFE_ENFORCE(split(';', FLAGS_input_dims).size() == split(';', FLAGS_input_types).size(),
+      "Number of input_dims and input_types should be the same");

  std::string output_model_name = FLAGS_output_model;
  if (output_model_name.empty()) {
@ -173,11 +218,12 @@ int main(int argc, char** argv) {
  m.eval();
  auto frozen_m = torch::jit::freeze_module(m.clone());
  auto graph = frozen_m.get_method(FLAGS_method_name).graph();
-  auto input_shapes = parseInputShapes();
+  auto inputShapes = parseInputShapes();
+  auto inputTypes = parseInputTypes();
  std::vector<c10::optional<at::Tensor>> example_inputs;
-  example_inputs.reserve(input_shapes.size());
-  for (const auto& input_shape : input_shapes) {
-    example_inputs.emplace_back(at::rand(input_shape));
+  example_inputs.reserve(inputShapes.size());
+  for (int i = 0; i < inputShapes.size(); ++i) {
+    example_inputs.emplace_back(at::rand(inputShapes[i]).to(at::dtype(inputTypes[i])));
  }

  torch::jit::RemoveTensorMutation(graph);
--- a/torch/csrc/jit/mobile/nnc/aot_compiler.cpp
+++ b/torch/csrc/jit/mobile/nnc/aot_compiler.cpp
@ -37,13 +37,13 @@ std::vector<int64_t> getConstSizes(const BufPtr b) {
 }

 std::vector<mobile::nnc::InputSpec> toInputSpecs(
-    const std::vector<std::vector<int64_t>>& inputSizes) {
+    const std::vector<std::vector<int64_t>>& inputSizes,
+    const std::vector<at::ScalarType>& inputTypes) {
  std::vector<mobile::nnc::InputSpec> specs;
-  for (const auto& sizes : inputSizes) {
+  for (int i = 0; i < inputSizes.size(); ++i) {
    mobile::nnc::InputSpec spec;
-    spec.sizes_ = sizes;
-    // TODO: Use user specified input type. For now using Long for BI model
-    spec.dtype_ = c10::ScalarType::Long;
+    spec.sizes_ = inputSizes[i];
+    spec.dtype_ = inputTypes[i];
    specs.emplace_back(std::move(spec));
  }
  return specs;
@ -52,10 +52,11 @@ std::vector<mobile::nnc::InputSpec> toInputSpecs(
 std::unique_ptr<Function> compileMethod(
    std::shared_ptr<tensorexpr::TensorExprKernel> kernel,
    const std::string& method_name,
-    const std::vector<std::vector<int64_t>>& sizes) {
+    const std::vector<std::vector<int64_t>>& sizes,
+    const std::vector<at::ScalarType>& types) {
  auto func = std::make_unique<Function>();
  func->set_name(method_name);
-  func->set_input_specs(toInputSpecs(sizes));
+  func->set_input_specs(toInputSpecs(sizes, types));

  auto params = c10::impl::GenericList(c10::AnyType::get());
  auto const_descriptors = kernel->getConstantDescriptors();
@ -110,15 +111,21 @@ std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
    const std::string& method_name,
    std::shared_ptr<Graph>& g,
    const std::vector<std::vector<int64_t>>& sizes,
+    const std::vector<at::ScalarType>& types,
    const std::string& kernel_func_name) {
  GRAPH_DEBUG("Input sizes ", sizes);
+  GRAPH_DEBUG("Input types ", types);
  GRAPH_DEBUG("Method name ", method_name);
+  GRAPH_DEBUG("Kernel func name ", kernel_func_name);
+
+  CAFFE_ENFORCE(
+      sizes.size() == types.size(),
+      "Number of input sizes and input types should be the same");

  std::vector<at::IValue> example_values;
  std::vector<c10::optional<at::Tensor>> example_inputs;
-  for (const auto& size : sizes) {
-    // TODO: Use user specified input type. For now using Long for BI model
-    auto example_input = at::rand(size).to(at::dtype(at::kLong));
+  for (int i = 0; i < sizes.size(); ++i) {
+    auto example_input = at::rand(sizes[i]).to(at::dtype(types[i]));
    example_values.emplace_back(example_input);
    example_inputs.emplace_back(example_input);
  }
@ -141,7 +148,7 @@ std::pair<std::unique_ptr<Function>, const std::string> aotCompile(

  const std::string compiled_assembly = kernel->getCodeText();

-  auto func = compileMethod(kernel, method_name, sizes);
+  auto func = compileMethod(kernel, method_name, sizes, types);
  return std::make_pair(std::move(func), compiled_assembly);
 }

--- a/torch/csrc/jit/mobile/nnc/aot_compiler.h
+++ b/torch/csrc/jit/mobile/nnc/aot_compiler.h
@ -15,6 +15,7 @@ TORCH_API std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
    const std::string& method_name,
    std::shared_ptr<Graph>& subgraph,
    const std::vector<std::vector<int64_t>>& sizes,
+    const std::vector<at::ScalarType>& types,
    const std::string& kernel_func_name = "func");

 } // namespace nnc