mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
This is reland of #87603 with definitions of c10::stoXX kept for further investigation. Pull Request resolved: https://github.com/pytorch/pytorch/pull/109566 Approved by: https://github.com/huydhn
1978 lines
71 KiB
C++
1978 lines
71 KiB
C++
#include "pybind_state.h"
|
|
|
|
#include <chrono>
|
|
#include <future>
|
|
#include <memory>
|
|
|
|
#include <pybind11/pybind11.h>
|
|
#include <pybind11/stl.h>
|
|
|
|
#include <c10/macros/Macros.h>
|
|
|
|
#include "caffe2/core/blob_serialization.h"
|
|
#include "caffe2/core/blob_stats.h"
|
|
#include "caffe2/core/common.h"
|
|
#include "caffe2/core/db.h"
|
|
#include "caffe2/core/numa.h"
|
|
#include "caffe2/core/operator.h"
|
|
#include "caffe2/core/stats.h"
|
|
#include "caffe2/core/transform.h"
|
|
#include "caffe2/observers/profile_observer.h"
|
|
#include "caffe2/observers/runcnt_observer.h"
|
|
#include "caffe2/observers/time_observer.h"
|
|
#include "caffe2/onnx/backend.h"
|
|
#include "caffe2/onnx/helper.h"
|
|
#include "caffe2/onnx/offline_tensor.h"
|
|
#include "caffe2/onnx/onnx_exporter.h"
|
|
#include "caffe2/opt/converter.h"
|
|
#include "caffe2/opt/fakefp16_transform.h"
|
|
#include "caffe2/opt/fusion.h"
|
|
#include "caffe2/opt/mobile.h"
|
|
#include "caffe2/opt/onnxifi_transformer.h"
|
|
#include "caffe2/opt/optimize_ideep.h"
|
|
#include "caffe2/opt/passes.h"
|
|
#include "caffe2/opt/shape_info.h"
|
|
#include "caffe2/predictor/emulator/data_filler.h"
|
|
#include "caffe2/predictor/predictor.h"
|
|
#include "caffe2/proto/caffe2_pb.h"
|
|
#include "caffe2/proto/torch.pb.h"
|
|
#include "caffe2/python/pybind_state_registry.h"
|
|
#include "caffe2/python/pybind_workspace.h"
|
|
#include "caffe2/utils/cpuid.h"
|
|
#include "caffe2/utils/string_utils.h"
|
|
#include "torch/csrc/autograd/variable.h"
|
|
#include "torch/csrc/jit/python/module_python.h"
|
|
|
|
// Because of CMake setup, we can't depend on script module here just yet -
|
|
// it pulls in generated files from a different directory and it
|
|
// probabilistically breaks the build.
|
|
// TODO: enable if once shared libraries are unified in CMake
|
|
#ifdef FBCODE_CAFFE2
|
|
#include "torch/script.h"
|
|
#endif
|
|
|
|
namespace caffe2 {
|
|
namespace python {
|
|
|
|
// A dummy variable to overcome the pybind11 py::arg::operator= ambiguity
|
|
// for some earlier versions of pybind11.
|
|
constexpr bool kPyBindFalse = false;
|
|
|
|
namespace py = pybind11;
|
|
|
|
// NOLINTNEXTLINE(modernize-use-equals-default)
|
|
BlobFeederBase::~BlobFeederBase() {}
|
|
|
|
C10_DEFINE_TYPED_REGISTRY(
|
|
BlobFeederRegistry,
|
|
caffe2::DeviceType,
|
|
BlobFeederBase,
|
|
std::unique_ptr);
|
|
|
|
REGISTER_BLOB_FETCHER((TypeMeta::Id<Tensor>()), TensorFetcher);
|
|
REGISTER_BLOB_FEEDER(CPU, TensorFeeder<CPUContext>);
|
|
|
|
class StringFetcher : public BlobFetcherBase {
|
|
public:
|
|
py::object Fetch(const Blob& blob) override {
|
|
return py::bytes(blob.Get<string>());
|
|
}
|
|
};
|
|
REGISTER_BLOB_FETCHER((TypeMeta::Id<string>()), StringFetcher);
|
|
|
|
#ifdef FBCODE_CAFFE2
|
|
class ScriptModuleFetcher : public BlobFetcherBase {
|
|
public:
|
|
pybind11::object Fetch(const Blob& blob) override {
|
|
return py::cast(*blob.Get<std::unique_ptr<torch::jit::Module>>());
|
|
}
|
|
};
|
|
|
|
REGISTER_BLOB_FETCHER(
|
|
(TypeMeta::Id<std::unique_ptr<torch::jit::Module>>()),
|
|
caffe2::python::ScriptModuleFetcher);
|
|
#endif
|
|
|
|
static_assert(
|
|
sizeof(int) == sizeof(int32_t),
|
|
"We make an assumption that int is always int32 for numpy "
|
|
"type mapping.");
|
|
int CaffeToNumpyType(const TypeMeta meta) {
|
|
#ifdef USE_NUMPY
|
|
static std::map<TypeIdentifier, int> numpy_type_map{
|
|
{TypeMeta::Id<bool>(), NPY_BOOL},
|
|
{TypeMeta::Id<double>(), NPY_DOUBLE},
|
|
{TypeMeta::Id<float>(), NPY_FLOAT},
|
|
{TypeMeta::Id<std::complex<double>>(), NPY_COMPLEX128},
|
|
{TypeMeta::Id<std::complex<float>>(), NPY_COMPLEX64},
|
|
{TypeMeta::Id<at::Half>(), NPY_FLOAT16},
|
|
{TypeMeta::Id<int>(), NPY_INT},
|
|
{TypeMeta::Id<int8_t>(), NPY_INT8},
|
|
{TypeMeta::Id<int16_t>(), NPY_INT16},
|
|
{TypeMeta::Id<int64_t>(), NPY_LONGLONG},
|
|
{TypeMeta::Id<uint8_t>(), NPY_UINT8},
|
|
{TypeMeta::Id<uint16_t>(), NPY_UINT16},
|
|
{TypeMeta::Id<std::string>(), NPY_OBJECT},
|
|
// Note: Add more types here.
|
|
};
|
|
const auto it = numpy_type_map.find(meta.id());
|
|
return it == numpy_type_map.end() ? -1 : it->second;
|
|
#else
|
|
CAFFE_THROW("Caffe2 compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
}
|
|
|
|
const TypeMeta NumpyTypeToCaffe(int numpy_type) {
|
|
#ifdef USE_NUMPY
|
|
static std::map<int, TypeMeta> caffe_type_map{
|
|
{NPY_BOOL, TypeMeta::Make<bool>()},
|
|
{NPY_DOUBLE, TypeMeta::Make<double>()},
|
|
{NPY_FLOAT, TypeMeta::Make<float>()},
|
|
{NPY_FLOAT16, TypeMeta::Make<at::Half>()},
|
|
{NPY_INT, TypeMeta::Make<int>()},
|
|
{NPY_INT8, TypeMeta::Make<int8_t>()},
|
|
{NPY_INT16, TypeMeta::Make<int16_t>()},
|
|
{NPY_INT64, TypeMeta::Make<int64_t>()},
|
|
{NPY_LONG,
|
|
sizeof(long) == sizeof(int) ? TypeMeta::Make<int>()
|
|
: TypeMeta::Make<int64_t>()},
|
|
{NPY_LONGLONG, TypeMeta::Make<int64_t>()},
|
|
{NPY_UINT8, TypeMeta::Make<uint8_t>()},
|
|
{NPY_UINT16, TypeMeta::Make<uint16_t>()},
|
|
{NPY_OBJECT, TypeMeta::Make<std::string>()},
|
|
{NPY_UNICODE, TypeMeta::Make<std::string>()},
|
|
{NPY_STRING, TypeMeta::Make<std::string>()},
|
|
// Note: Add more types here.
|
|
};
|
|
static TypeMeta unknown_type;
|
|
const auto it = caffe_type_map.find(numpy_type);
|
|
return it == caffe_type_map.end() ? unknown_type : it->second;
|
|
#else
|
|
CAFFE_THROW("Caffe2 compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
}
|
|
|
|
template <typename Registry>
|
|
std::function<const char*(const string&)> DefinitionGetter(
|
|
const Registry* registry) {
|
|
return [registry](const string& name) { return registry->HelpMessage(name); };
|
|
}
|
|
|
|
namespace python_detail {
|
|
// Python Op implementations.
|
|
using FuncRegistry = std::unordered_map<std::string, Func>;
|
|
|
|
FuncRegistry& gRegistry() {
|
|
// Always leak the objects registered here.
|
|
static FuncRegistry* r = new FuncRegistry();
|
|
return *r;
|
|
}
|
|
|
|
const Func& getOpFunc(const std::string& token) {
|
|
CAFFE_ENFORCE(
|
|
gRegistry().count(token),
|
|
"Python operator for ",
|
|
token,
|
|
" is not available. If you use distributed training it probably means "
|
|
"that python implementation has to be registered in each of the workers");
|
|
return gRegistry()[token];
|
|
}
|
|
|
|
const Func& getGradientFunc(const std::string& token) {
|
|
return getOpFunc(token + "_gradient");
|
|
}
|
|
|
|
py::object fetchBlob(Workspace* ws, const std::string& name) {
|
|
CAFFE_ENFORCE(ws->HasBlob(name), "Can't find blob: ", name);
|
|
const caffe2::Blob& blob = *(ws->GetBlob(name));
|
|
auto fetcher = CreateFetcher(blob.meta().id());
|
|
if (fetcher) {
|
|
return fetcher->Fetch(blob);
|
|
} else {
|
|
// If there is no fetcher registered, return a metainfo string.
|
|
// If all branches failed, we will return a metainfo string.
|
|
std::stringstream ss;
|
|
ss << std::string(name) << ", a C++ native class of type "
|
|
<< blob.TypeName() << ".";
|
|
return py::bytes(ss.str());
|
|
}
|
|
}
|
|
|
|
// This function can only return true, but keeping it for backward compatibility
|
|
bool feedBlob(
|
|
Blob* blob,
|
|
const py::object& arg,
|
|
const py::object device_option) {
|
|
DeviceOption option;
|
|
if (!device_option.is_none()) {
|
|
// If we have a device option passed in, read it.
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(
|
|
py::bytes(device_option).cast<std::string>(), &option));
|
|
}
|
|
#ifdef USE_NUMPY
|
|
if (PyArray_Check(arg.ptr())) { // numpy array
|
|
PyArrayObject* array = reinterpret_cast<PyArrayObject*>(arg.ptr());
|
|
auto feeder = CreateFeeder(option.device_type());
|
|
CAFFE_ENFORCE(feeder, "Unknown device type encountered in FeedBlob.");
|
|
feeder->Feed(option, array, blob, true); /* default to inplace feed */
|
|
return true;
|
|
}
|
|
#else
|
|
CAFFE_THROW("Caffe2 compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
if (PyBytes_Check(arg.ptr()) || PyUnicode_Check(arg.ptr())) {
|
|
*blob->GetMutable<std::string>() = arg.cast<std::string>();
|
|
return true;
|
|
}
|
|
#ifdef FBCODE_CAFFE2
|
|
if (auto module = torch::jit::as_module(arg)) {
|
|
blob->GetMutable<std::unique_ptr<torch::jit::Module>>()->reset(
|
|
new torch::jit::Module(*module));
|
|
return true;
|
|
}
|
|
#endif
|
|
CAFFE_THROW(
|
|
"Unexpected type of argument - only numpy array or string are "
|
|
"supported for feeding");
|
|
return false;
|
|
}
|
|
|
|
Blob deserializeBlob(const string& content) {
|
|
Blob blob;
|
|
DeserializeBlob(content, &blob);
|
|
return blob;
|
|
}
|
|
} // namespace python_detail
|
|
|
|
class GetPythonGradient : public GradientMakerBase {
|
|
public:
|
|
using GradientMakerBase::GradientMakerBase;
|
|
std::vector<OperatorDef> GetGradientDefs() override {
|
|
CAFFE_ENFORCE(Def().type() == "Python" || Def().type() == "PythonDLPack");
|
|
ArgumentHelper helper(Def());
|
|
auto gradOutputIndices =
|
|
helper.GetRepeatedArgument<int>("grad_output_indices");
|
|
auto gradInputIndices =
|
|
helper.GetRepeatedArgument<int>("grad_input_indices");
|
|
std::vector<std::string> gradientInputs;
|
|
for (int i = 0; i < def_.input_size(); ++i) {
|
|
// NOLINTNEXTLINE(performance-inefficient-vector-operation)
|
|
gradientInputs.push_back(I(i));
|
|
}
|
|
for (int i = 0; i < def_.output_size(); ++i) {
|
|
gradientInputs.push_back(O(i));
|
|
}
|
|
if (gradOutputIndices.size() > 0) {
|
|
// NOLINTNEXTLINE(modernize-loop-convert)
|
|
for (unsigned i = 0; i < gradOutputIndices.size(); ++i) {
|
|
int GO_i = gradOutputIndices[i];
|
|
gradientInputs.push_back(GO(GO_i));
|
|
}
|
|
} else {
|
|
for (int i = 0; i < def_.output_size(); ++i) {
|
|
gradientInputs.push_back(GO(i));
|
|
}
|
|
}
|
|
std::vector<std::string> gradientOutputs;
|
|
if (gradInputIndices.size() > 0) {
|
|
// NOLINTNEXTLINE(modernize-loop-convert)
|
|
for (unsigned i = 0; i < gradInputIndices.size(); ++i) {
|
|
int GI_i = gradInputIndices[i];
|
|
gradientOutputs.push_back(GI(GI_i));
|
|
}
|
|
} else {
|
|
for (int i = 0; i < def_.input_size(); ++i) {
|
|
gradientOutputs.push_back(GI(i));
|
|
}
|
|
}
|
|
|
|
std::string grad_op_name = "PythonGradient";
|
|
if (Def().type() == "PythonDLPack") {
|
|
grad_op_name = "PythonDLPackGradient";
|
|
}
|
|
return SingleGradientDef(grad_op_name, "", gradientInputs, gradientOutputs);
|
|
}
|
|
};
|
|
|
|
REGISTER_CPU_OPERATOR(Python, PythonOp<CPUContext, false>);
|
|
REGISTER_CPU_OPERATOR(PythonGradient, PythonGradientOp<CPUContext, false>);
|
|
// Always allow running in-place
|
|
OPERATOR_SCHEMA(Python).AllowInplace([](int, int) { return true; });
|
|
OPERATOR_SCHEMA(PythonGradient).AllowInplace([](int, int) { return true; });
|
|
REGISTER_GRADIENT(Python, GetPythonGradient);
|
|
|
|
REGISTER_CPU_OPERATOR(PythonDLPack, PythonOp<CPUContext, true>);
|
|
REGISTER_CPU_OPERATOR(PythonDLPackGradient, PythonGradientOp<CPUContext, true>);
|
|
OPERATOR_SCHEMA(PythonDLPack).AllowInplace([](int, int) { return true; });
|
|
OPERATOR_SCHEMA(PythonDLPackGradient).AllowInplace([](int, int) {
|
|
return true;
|
|
});
|
|
REGISTER_GRADIENT(PythonDLPack, GetPythonGradient);
|
|
|
|
class BackgroundPlan {
|
|
public:
|
|
// NOLINTNEXTLINE(modernize-pass-by-value)
|
|
BackgroundPlan(Workspace* ws, PlanDef def) : ws_(ws), def_(def) {}
|
|
|
|
void run() {
|
|
fut_ =
|
|
std::async(std::launch::async, [this]() { return ws_->RunPlan(def_); });
|
|
}
|
|
|
|
bool isDone() {
|
|
CAFFE_ENFORCE(fut_.valid());
|
|
auto status = fut_.wait_for(std::chrono::milliseconds(0));
|
|
return status == std::future_status::ready;
|
|
}
|
|
|
|
bool isSucceeded() {
|
|
CAFFE_ENFORCE(isDone());
|
|
return fut_.get();
|
|
}
|
|
|
|
private:
|
|
Workspace* ws_;
|
|
PlanDef def_;
|
|
|
|
std::future<bool> fut_;
|
|
};
|
|
|
|
void addObjectMethods(py::module& m) {
|
|
py::class_<NetBase>(m, "Net")
|
|
.def(
|
|
"run",
|
|
[](NetBase* net) {
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(net->Run());
|
|
})
|
|
.def("cancel", [](NetBase* net) {
|
|
py::gil_scoped_release g;
|
|
net->Cancel();
|
|
});
|
|
|
|
py::class_<ObserverBase<NetBase>>(m, "Observer")
|
|
.def(
|
|
"average_time",
|
|
[](ObserverBase<NetBase>* ob) {
|
|
auto* cast_ob = dynamic_cast_if_rtti<TimeObserver*>(ob);
|
|
CAFFE_ENFORCE(
|
|
cast_ob, "Observer does not implement this function.");
|
|
return cast_ob->average_time();
|
|
})
|
|
.def(
|
|
"average_time_children",
|
|
[](ObserverBase<NetBase>* ob) {
|
|
auto* cast_ob = dynamic_cast_if_rtti<TimeObserver*>(ob);
|
|
CAFFE_ENFORCE(
|
|
cast_ob, "Observer does not implement this function.");
|
|
return cast_ob->average_time_children();
|
|
})
|
|
.def("debug_info", [](ObserverBase<NetBase>* ob) {
|
|
return ob->debugInfo();
|
|
});
|
|
|
|
py::class_<Blob>(m, "Blob")
|
|
.def(
|
|
"serialize",
|
|
[](const Blob& blob, const std::string& name) -> py::bytes {
|
|
return SerializeBlob(blob, name);
|
|
})
|
|
.def(
|
|
"deserialize",
|
|
[](Blob* blob, py::bytes serialized) {
|
|
DeserializeBlob(serialized, blob);
|
|
})
|
|
.def(
|
|
"fetch",
|
|
[](const Blob& blob) {
|
|
auto fetcher = CreateFetcher(blob.meta().id());
|
|
CAFFE_ENFORCE(
|
|
fetcher,
|
|
"Could not fetch for blob of type: ",
|
|
blob.meta().name());
|
|
return fetcher->Fetch(blob);
|
|
})
|
|
.def("is_tensor", [](Blob* blob) { return blob->IsType<Tensor>(); })
|
|
// return any device Tensor
|
|
.def(
|
|
"as_tensor",
|
|
[](Blob* blob) {
|
|
CAFFE_ENFORCE(
|
|
blob->IsType<Tensor>(),
|
|
"Passed in blob doesn't contain Tensor and instead has ",
|
|
blob->meta());
|
|
return py::cast(&blob->Get<Tensor>());
|
|
},
|
|
py::return_value_policy::reference_internal)
|
|
// legacy API that resets tensor to CPUTensor if it's not already
|
|
.def(
|
|
"tensor",
|
|
[](Blob* blob) { return py::cast(BlobGetMutableTensor(blob, CPU)); },
|
|
py::return_value_policy::reference_internal)
|
|
.def(
|
|
"_feed",
|
|
&python_detail::feedBlob,
|
|
"Feed an input array or string, with the (optional) DeviceOption",
|
|
py::arg("arg"),
|
|
py::arg("device_option") = py::none())
|
|
.def("_wrap_tensor_impl", [](Blob* blob, void* ptr) {
|
|
auto p = c10::intrusive_ptr<c10::TensorImpl, at::UndefinedTensorImpl>::
|
|
unsafe_reclaim_from_nonowning(static_cast<c10::TensorImpl*>(ptr));
|
|
TORCH_CHECK(p.defined(), "Can't wrap undefined tensor");
|
|
TORCH_CHECK(
|
|
!p->requires_grad(), "Can wrap only non-requires-grad tensor");
|
|
auto at_tensor = at::Tensor::wrap_tensor_impl(std::move(p));
|
|
BlobSetTensor(blob, Tensor(std::move(at_tensor)));
|
|
});
|
|
|
|
py::class_<DLPackWrapper<CPUContext>>(m, "DLPackTensorCPU")
|
|
.def_property_readonly(
|
|
"data",
|
|
[](DLPackWrapper<CPUContext>* t) -> py::object {
|
|
CAFFE_ENFORCE_EQ(
|
|
t->device_option.device_type(),
|
|
PROTO_CPU,
|
|
"Expected CPU device option for CPU tensor");
|
|
return t->data();
|
|
},
|
|
"Return DLPack tensor with tensor's data.")
|
|
.def(
|
|
"feed",
|
|
[](DLPackWrapper<CPUContext>* t, py::object obj) {
|
|
CAFFE_ENFORCE_EQ(
|
|
t->device_option.device_type(),
|
|
PROTO_CPU,
|
|
"Expected CPU device option for CPU tensor");
|
|
t->feed(obj);
|
|
},
|
|
"Copy data from given DLPack tensor into this tensor.")
|
|
.def_property_readonly(
|
|
"_shape",
|
|
[](const DLPackWrapper<CPUContext>& t) {
|
|
auto* tensor = t.tensor;
|
|
// TODO: This is marginally less efficient than it could
|
|
// be, since we're doing an extra allocation we didn't
|
|
// need to do. But I don't remember how to clue in
|
|
// pybind11 how to convert ArrayRef to vector.
|
|
return tensor->sizes().vec();
|
|
})
|
|
.def(
|
|
"_reshape",
|
|
[](DLPackWrapper<CPUContext>* t, std::vector<int64_t> dims) {
|
|
auto* tensor = t->tensor;
|
|
tensor->Resize(dims);
|
|
});
|
|
|
|
py::class_<TensorCPU>(m, "TensorCPU")
|
|
.def_property_readonly(
|
|
"data",
|
|
[](TensorCPU* t) -> py::object {
|
|
if (t->dtype() == TypeMeta{}) {
|
|
// keep this behavior for backward compatibility
|
|
t->mutable_data<float>();
|
|
}
|
|
auto res = TensorFetcher().FetchTensor(*t, false);
|
|
return res.obj;
|
|
},
|
|
"Return numpy array pointing to this tensor's data if possible. "
|
|
"Otherwise (e.g. for strings) copies the data (same as fetch).")
|
|
.def(
|
|
"feed",
|
|
[](TensorCPU* t, py::object obj) {
|
|
#ifdef USE_NUMPY
|
|
if (!PyArray_Check(obj.ptr())) {
|
|
CAFFE_THROW(
|
|
"Unexpected type of argument -- expected numpy array");
|
|
}
|
|
*t = TensorFeeder<CPUContext>().FeedTensor(
|
|
DeviceOption{}, reinterpret_cast<PyArrayObject*>(obj.ptr()));
|
|
#else
|
|
CAFFE_THROW("Caffe2 compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
},
|
|
"Copy data from given numpy array into this tensor.")
|
|
.def(
|
|
"fetch",
|
|
[](TensorCPU* t) {
|
|
auto res = TensorFetcher().FetchTensor(*t, true);
|
|
return res.obj;
|
|
},
|
|
"Copy data from this tensor into a new numpy array.")
|
|
.def(
|
|
"init",
|
|
[](Tensor* t, std::vector<int64_t> dims, int caffe_type) {
|
|
const auto& meta =
|
|
DataTypeToTypeMeta((TensorProto::DataType)caffe_type);
|
|
CAFFE_ENFORCE(
|
|
!TensorFetcher().NeedsCopy(t, meta),
|
|
"Cannot init tensor of this type. Use `feed` instead.");
|
|
t->Resize(dims);
|
|
t->raw_mutable_data(meta);
|
|
},
|
|
"Initialize this tensor to given shape and data type. "
|
|
"Fail if the given data type cannot be accessed from python.")
|
|
.def(
|
|
"_tensor_impl_raw_handle",
|
|
[](TensorCPU* t) -> void* {
|
|
// NOLINTNEXTLINE(performance-unnecessary-copy-initialization)
|
|
auto p = t->getIntrusivePtr();
|
|
// We return a raw non-owning pointer here, we rely on surrounding
|
|
// code to keep the original tensor alive
|
|
return p.get();
|
|
})
|
|
.def_property_readonly(
|
|
"_shape", [](const TensorCPU& t) { return t.sizes().vec(); })
|
|
.def("_reshape", [](TensorCPU* t, std::vector<int64_t> dims) {
|
|
t->Resize(dims);
|
|
});
|
|
|
|
py::class_<Workspace>(m, "Workspace")
|
|
.def(py::init<>())
|
|
.def(py::init<Workspace*>())
|
|
.def_property_readonly(
|
|
"nets",
|
|
[](Workspace* self) {
|
|
TORCH_CHECK_NOTNULL(self);
|
|
std::map<std::string, py::object> nets;
|
|
for (const auto& name : self->Nets()) {
|
|
LOG(INFO) << "name: " << name;
|
|
nets[name] = py::cast(self->GetNet(name));
|
|
}
|
|
return nets;
|
|
},
|
|
py::return_value_policy::reference_internal)
|
|
.def_property_readonly(
|
|
"blobs",
|
|
[](Workspace* self) {
|
|
TORCH_CHECK_NOTNULL(self);
|
|
std::map<std::string, py::object> blobs;
|
|
for (const auto& name : self->Blobs()) {
|
|
blobs[name] = py::cast(self->GetBlob(name));
|
|
}
|
|
return blobs;
|
|
},
|
|
py::return_value_policy::reference_internal)
|
|
.def(
|
|
"_create_net",
|
|
[](Workspace* self, py::bytes def, bool overwrite) -> py::object {
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
NetBase* net = self->CreateNet(proto, overwrite);
|
|
CAFFE_ENFORCE(net);
|
|
return py::cast(net);
|
|
},
|
|
py::return_value_policy::reference_internal,
|
|
py::arg("def"),
|
|
py::arg("overwrite") = kPyBindFalse)
|
|
.def(
|
|
"create_blob",
|
|
[](Workspace* self, const std::string& name) -> py::object {
|
|
return py::cast(self->CreateBlob(name));
|
|
},
|
|
py::return_value_policy::reference_internal)
|
|
.def(
|
|
"_remove_blob",
|
|
[](Workspace* self, const std::string& name) -> py::bool_ {
|
|
return self->RemoveBlob(name);
|
|
})
|
|
.def("fetch_blob", &python_detail::fetchBlob)
|
|
.def(
|
|
"has_blob",
|
|
[](Workspace* self, const std::string& name) {
|
|
return self->HasBlob(name);
|
|
})
|
|
.def(
|
|
"_run_net",
|
|
[](Workspace* self, py::bytes def) {
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(self->RunNetOnce(proto));
|
|
})
|
|
.def(
|
|
"_run_operator",
|
|
[](Workspace* self, py::bytes def) {
|
|
caffe2::OperatorDef proto;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(self->RunOperatorOnce(proto));
|
|
})
|
|
.def(
|
|
"_run_plan",
|
|
[](Workspace* self, py::bytes def) {
|
|
caffe2::PlanDef proto;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(self->RunPlan(proto));
|
|
})
|
|
.def(
|
|
"_last_failed_op_net_position",
|
|
[](Workspace* self) {
|
|
CAFFE_ENFORCE(self);
|
|
return (int)self->last_failed_op_net_position;
|
|
})
|
|
.def_property_readonly_static("current", [](py::object /* type */) {
|
|
auto ws = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(ws);
|
|
return py::cast(ws, py::return_value_policy::reference);
|
|
});
|
|
|
|
py::class_<BackgroundPlan, std::shared_ptr<BackgroundPlan>>(
|
|
m, "BackgroundPlan")
|
|
.def("is_done", &BackgroundPlan::isDone)
|
|
.def("is_succeeded", &BackgroundPlan::isSucceeded);
|
|
|
|
// Gradients
|
|
py::class_<GradientWrapper>(m, "GradientWrapper")
|
|
.def(py::init<>())
|
|
.def_readwrite("dense", &GradientWrapper::dense_)
|
|
.def_readwrite("indices", &GradientWrapper::indices_)
|
|
.def_readwrite("values", &GradientWrapper::values_)
|
|
.def("is_sparse", &GradientWrapper::IsSparse)
|
|
.def("is_dense", &GradientWrapper::IsDense)
|
|
.def("is_empty", &GradientWrapper::IsEmpty);
|
|
|
|
m.def(
|
|
"get_gradient_defs",
|
|
[](py::bytes op_def, std::vector<GradientWrapper> output_gradients) {
|
|
OperatorDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(op_def.cast<std::string>(), &def));
|
|
CAFFE_ENFORCE(caffe2::GradientRegistry()->Has(def.type()));
|
|
const auto& meta = GetGradientForOp(def, output_gradients);
|
|
std::vector<py::bytes> grad_ops;
|
|
for (const auto& op : meta.ops_) {
|
|
// NOLINTNEXTLINE(modernize-use-emplace)
|
|
grad_ops.push_back(
|
|
SerializeAsString_EnforceCheck(op, "addObjectMethods"));
|
|
}
|
|
return std::pair<std::vector<py::bytes>, std::vector<GradientWrapper>>{
|
|
grad_ops, meta.g_input_};
|
|
},
|
|
pybind11::return_value_policy::copy);
|
|
|
|
// DB
|
|
py::class_<db::Transaction>(m, "Transaction")
|
|
.def("put", &db::Transaction::Put)
|
|
.def("commit", &db::Transaction::Commit);
|
|
py::class_<db::Cursor>(m, "Cursor")
|
|
.def("supports_seek", &db::Cursor::SupportsSeek)
|
|
.def("seek_to_first", &db::Cursor::SeekToFirst)
|
|
.def("next", &db::Cursor::Next)
|
|
.def("key", [](db::Cursor* self) -> py::bytes { return self->key(); })
|
|
.def("value", [](db::Cursor* self) -> py::bytes { return self->value(); })
|
|
.def("valid", &db::Cursor::Valid);
|
|
py::enum_<db::Mode>(m, "Mode")
|
|
.value("read", db::Mode::READ)
|
|
.value("write", db::Mode::WRITE)
|
|
.value("new", db::Mode::NEW)
|
|
.export_values();
|
|
py::class_<db::DB /*, std::unique_ptr<DB>*/>(m, "DB")
|
|
.def("new_transaction", &db::DB::NewTransaction)
|
|
.def("new_cursor", &db::DB::NewCursor)
|
|
.def("close", &db::DB::Close);
|
|
m.def("create_db", &db::CreateDB);
|
|
m.def("registered_dbs", []() {
|
|
return caffe2::db::Caffe2DBRegistry()->Keys();
|
|
});
|
|
|
|
// OpSchema
|
|
py::class_<OpSchema> op_schema(m, "OpSchema");
|
|
op_schema.def_property_readonly("file", &OpSchema::file)
|
|
.def_property_readonly("line", &OpSchema::line)
|
|
.def_property_readonly("private", &OpSchema::private_op)
|
|
.def_property_readonly(
|
|
"doc", &OpSchema::doc, py::return_value_policy::reference)
|
|
.def_property_readonly("args", &OpSchema::args)
|
|
.def_property_readonly("input_desc", &OpSchema::input_desc)
|
|
.def_property_readonly("output_desc", &OpSchema::output_desc)
|
|
.def_property_readonly("max_input", &OpSchema::max_input)
|
|
.def_property_readonly("max_output", &OpSchema::max_output)
|
|
.def_property_readonly("min_input", &OpSchema::min_input)
|
|
.def_property_readonly("min_output", &OpSchema::min_output)
|
|
.def_property_readonly("inf", &OpSchema::inf)
|
|
// Note: this does not work yet, we will need to figure out how to pass
|
|
// protobuf objects.
|
|
.def("infer_tensor", &OpSchema::InferTensor)
|
|
.def("CalculateOutput", &OpSchema::CalculateOutput)
|
|
.def("inplace_enforced", &OpSchema::inplace_enforced)
|
|
.def("num_inputs_allowed", &OpSchema::num_inputs_allowed)
|
|
.def("num_outputs_allowed", &OpSchema::num_outputs_allowed)
|
|
.def("num_inputs_outputs_allowed", &OpSchema::num_inputs_outputs_allowed)
|
|
.def_static(
|
|
"get", &OpSchemaRegistry::Schema, py::return_value_policy::reference)
|
|
.def_static(
|
|
"get_cpu_impl",
|
|
DefinitionGetter(CPUOperatorRegistry()),
|
|
py::return_value_policy::reference)
|
|
.def_static(
|
|
"get_cuda_impl",
|
|
DefinitionGetter(CUDAOperatorRegistry()),
|
|
py::return_value_policy::reference)
|
|
.def_static(
|
|
"get_gradient_impl",
|
|
DefinitionGetter(GradientRegistry()),
|
|
py::return_value_policy::reference);
|
|
|
|
py::class_<OpSchema::Argument>(op_schema, "Argument")
|
|
.def_property_readonly("name", &OpSchema::Argument::name)
|
|
.def_property_readonly("description", &OpSchema::Argument::description)
|
|
.def_property_readonly("required", &OpSchema::Argument::is_required);
|
|
|
|
py::class_<caffe2::onnx::Caffe2Ops>(m, "Caffe2Ops")
|
|
.def(py::init([](const std::vector<py::bytes>& init_ops,
|
|
const std::vector<py::bytes>& ops,
|
|
const std::vector<std::string>& interface_blobs) {
|
|
auto* c2ops = new caffe2::onnx::Caffe2Ops();
|
|
for (const auto& s : init_ops) {
|
|
ParseProtoFromLargeString(
|
|
s.cast<std::string>(), c2ops->init_ops.Add());
|
|
}
|
|
for (const auto& s : ops) {
|
|
ParseProtoFromLargeString(s.cast<std::string>(), c2ops->ops.Add());
|
|
}
|
|
for (const auto& s : interface_blobs) {
|
|
auto* tmp = c2ops->interface_blobs.Add();
|
|
*tmp = s;
|
|
}
|
|
return c2ops;
|
|
}));
|
|
|
|
py::class_<caffe2::onnx::DummyName>(m, "DummyName")
|
|
.def(py::init<>())
|
|
.def(
|
|
"reset",
|
|
[](caffe2::onnx::DummyName& instance, const py::object& args) {
|
|
if (args.is_none()) {
|
|
instance.Reset(std::unordered_set<std::string>());
|
|
} else {
|
|
instance.Reset(args.cast<std::unordered_set<std::string>>());
|
|
}
|
|
},
|
|
"Reset the dummy name generator",
|
|
py::arg("args") = py::none())
|
|
.def(
|
|
"new_dummy_name",
|
|
[](caffe2::onnx::DummyName& instance) -> std::string {
|
|
return instance.NewDummyName();
|
|
});
|
|
|
|
py::class_<caffe2::onnx::Caffe2BackendRep>(m, "Caffe2BackenRep")
|
|
.def(py::init<>())
|
|
.def(
|
|
"init_net",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance) {
|
|
const auto& init_net = instance.init_net();
|
|
std::string out;
|
|
init_net.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
})
|
|
|
|
.def(
|
|
"pred_net",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance) {
|
|
const auto& pred_net = instance.pred_net();
|
|
std::string out;
|
|
pred_net.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
})
|
|
.def(
|
|
"external_outputs",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance) {
|
|
std::vector<std::string> outputs;
|
|
for (const auto& o : instance.pred_net().external_output()) {
|
|
outputs.emplace_back(o);
|
|
}
|
|
return outputs;
|
|
})
|
|
.def(
|
|
"external_inputs",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance) {
|
|
std::vector<std::string> inputs;
|
|
for (const auto& o : instance.pred_net().external_input()) {
|
|
inputs.emplace_back(o);
|
|
}
|
|
return inputs;
|
|
})
|
|
.def(
|
|
"uninitialized_inputs",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance) {
|
|
return instance.uninitialized_inputs();
|
|
})
|
|
.def(
|
|
"run",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance,
|
|
std::map<std::string, py::object> inputs)
|
|
-> std::vector<py::object> {
|
|
caffe2::Predictor::TensorMap tensors_data{};
|
|
for (const auto& pair : inputs) {
|
|
const auto& name = pair.first;
|
|
const auto& input = pair.second;
|
|
#ifdef USE_NUMPY
|
|
CAFFE_ENFORCE(
|
|
PyArray_Check(input.ptr()),
|
|
"Input must be of type numpy array.");
|
|
PyArrayObject* array =
|
|
reinterpret_cast<PyArrayObject*>(input.ptr());
|
|
tensors_data.emplace(
|
|
name,
|
|
TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array));
|
|
#else
|
|
CAFFE_THROW("Caffe2 was compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
}
|
|
caffe2::Predictor::TensorList out;
|
|
instance.RunMap(tensors_data, &out);
|
|
std::vector<py::object> pyout;
|
|
for (auto& t : out) {
|
|
pyout.push_back(TensorFetcher().FetchTensor(t, true).obj);
|
|
}
|
|
return pyout;
|
|
})
|
|
.def(
|
|
"run",
|
|
[](caffe2::onnx::Caffe2BackendRep& instance,
|
|
std::vector<py::object> inputs) -> std::vector<py::object> {
|
|
std::vector<TensorCPU> tensors_data;
|
|
#ifdef USE_NUMPY
|
|
// NOLINTNEXTLINE(modernize-loop-convert)
|
|
for (auto i = 0U; i < inputs.size(); ++i) {
|
|
auto input = inputs[i];
|
|
CAFFE_ENFORCE(
|
|
PyArray_Check(input.ptr()),
|
|
"Input must be of type numpy array.");
|
|
PyArrayObject* array =
|
|
reinterpret_cast<PyArrayObject*>(input.ptr());
|
|
tensors_data.push_back(
|
|
TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array));
|
|
}
|
|
#else
|
|
CAFFE_THROW("Caffe2 was compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
std::vector<TensorCPU> out;
|
|
instance.Run(tensors_data, &out);
|
|
std::vector<py::object> pyout;
|
|
for (auto& t : out) {
|
|
// NOLINTNEXTLINE(performance-inefficient-vector-operation)
|
|
pyout.push_back(TensorFetcher().FetchTensor(t, true).obj);
|
|
}
|
|
return pyout;
|
|
});
|
|
|
|
py::class_<caffe2::onnx::Caffe2Backend>(m, "Caffe2Backend")
|
|
.def(py::init<>())
|
|
.def(py::init<caffe2::onnx::DummyName*>())
|
|
.def(
|
|
"support_onnx_import",
|
|
[](caffe2::onnx::Caffe2Backend& instance,
|
|
const std::string& op) -> bool { return instance.SupportOp(op); })
|
|
.def(
|
|
"prepare",
|
|
[](caffe2::onnx::Caffe2Backend& instance,
|
|
const py::bytes& onnx_model_str,
|
|
const std::string& device,
|
|
const std::vector<caffe2::onnx::Caffe2Ops>& extras) {
|
|
auto* rep = instance.Prepare(
|
|
onnx_model_str.cast<std::string>(), device, extras);
|
|
return rep;
|
|
})
|
|
.def(
|
|
"convert_node",
|
|
[](caffe2::onnx::Caffe2Backend& instance,
|
|
const py::bytes& node_str,
|
|
const std::vector<py::bytes>& value_infos_bytes,
|
|
int opset_version) -> std::vector<std::vector<py::bytes>> {
|
|
// Note that we return two lists of serialized ops. The first set is
|
|
// init_ops and the second set is ops for pred net. When converting
|
|
// RNN related op, it is possible that we will create ops in the
|
|
// init_net. Hence the return structure here
|
|
caffe2::onnx::ValueInfoMap value_infos{};
|
|
for (const auto& vi_bytes : value_infos_bytes) {
|
|
::ONNX_NAMESPACE::ValueInfoProto vi{};
|
|
vi.ParseFromString(vi_bytes);
|
|
auto name = vi.name();
|
|
value_infos.emplace(std::move(name), std::move(vi));
|
|
}
|
|
auto c2ops = instance.ConvertNode(
|
|
node_str.cast<std::string>(), {value_infos, opset_version});
|
|
std::vector<std::vector<py::bytes>> vals;
|
|
vals.emplace_back();
|
|
auto& init_vals = vals.back();
|
|
for (const auto& init_op : c2ops.init_ops) {
|
|
std::string out;
|
|
init_op.SerializeToString(&out);
|
|
init_vals.emplace_back(py::bytes(out));
|
|
}
|
|
vals.emplace_back();
|
|
auto& normal_vals = vals.back();
|
|
for (const auto& op : c2ops.ops) {
|
|
std::string out;
|
|
op.SerializeToString(&out);
|
|
normal_vals.emplace_back(py::bytes(out));
|
|
}
|
|
return vals;
|
|
},
|
|
py::arg("node_str"),
|
|
py::arg("value_infos_bytes") = std::vector<py::bytes>{},
|
|
py::arg("opset_version") = kKnownOpsetVersion)
|
|
.def(
|
|
"_build_tensor_filling_op",
|
|
[](caffe2::onnx::Caffe2Backend& instance,
|
|
const py::bytes& tensor_proto_str,
|
|
const std::string& name = "") -> py::bytes {
|
|
caffe2::OperatorDef op;
|
|
::ONNX_NAMESPACE::TensorProto tp;
|
|
ParseProtoFromLargeString(tensor_proto_str, &tp);
|
|
instance.BuildTensorFillingOp(&op, tp, name);
|
|
std::string out;
|
|
op.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
|
|
py::class_<Predictor>(m, "Predictor")
|
|
.def(py::init([](py::bytes init_net, py::bytes predict_net) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
NetDef init_net_, predict_net_;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(
|
|
init_net.cast<std::string>(), &init_net_));
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(
|
|
predict_net.cast<std::string>(), &predict_net_));
|
|
return new Predictor(
|
|
makePredictorConfig(init_net_, predict_net_, workspace));
|
|
}))
|
|
.def(
|
|
"run",
|
|
[](Predictor& instance,
|
|
std::vector<py::object> inputs) -> std::vector<py::object> {
|
|
std::vector<Tensor> tensors_data;
|
|
#ifdef USE_NUMPY
|
|
// NOLINTNEXTLINE(modernize-loop-convert)
|
|
for (auto i = 0U; i < inputs.size(); ++i) {
|
|
auto input = inputs[i];
|
|
CAFFE_ENFORCE(
|
|
PyArray_Check(input.ptr()),
|
|
"Input must be of type numpy array.");
|
|
PyArrayObject* array =
|
|
reinterpret_cast<PyArrayObject*>(input.ptr());
|
|
tensors_data.push_back(
|
|
TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array));
|
|
}
|
|
#else
|
|
CAFFE_THROW("Caffe2 was compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
std::vector<TensorCPU> out;
|
|
instance(tensors_data, &out);
|
|
std::vector<py::object> pyout;
|
|
for (auto& t : out) {
|
|
// NOLINTNEXTLINE(performance-inefficient-vector-operation)
|
|
pyout.push_back(TensorFetcher().FetchTensor(t, true).obj);
|
|
}
|
|
return pyout;
|
|
})
|
|
.def(
|
|
"run",
|
|
[](Predictor& instance, std::map<std::string, py::object> inputs)
|
|
-> std::vector<py::object> {
|
|
Predictor::TensorMap tensors_data;
|
|
#ifdef USE_NUMPY
|
|
for (const auto& pair : inputs) {
|
|
const auto& name = pair.first;
|
|
const auto& input = pair.second;
|
|
CAFFE_ENFORCE(
|
|
PyArray_Check(input.ptr()),
|
|
"Input must be of type numpy array.");
|
|
PyArrayObject* array =
|
|
reinterpret_cast<PyArrayObject*>(input.ptr());
|
|
tensors_data.emplace(
|
|
name,
|
|
TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array));
|
|
}
|
|
#else
|
|
CAFFE_THROW("Caffe2 was compiled without NumPy support.");
|
|
#endif // USE_NUMPY
|
|
Predictor::TensorList out;
|
|
instance(tensors_data, &out);
|
|
std::vector<py::object> pyout;
|
|
for (auto& t : out) {
|
|
pyout.push_back(TensorFetcher().FetchTensor(t, true).obj);
|
|
}
|
|
return pyout;
|
|
});
|
|
}
|
|
|
|
void addGlobalMethods(py::module& m) {
|
|
m.attr("is_asan") = py::bool_(C10_ASAN_ENABLED);
|
|
m.attr("has_fbgemm") = py::bool_(
|
|
#ifdef USE_FBGEMM
|
|
true
|
|
#else
|
|
false
|
|
#endif
|
|
);
|
|
m.def("get_build_options", []() { return GetBuildOptions(); });
|
|
|
|
// The old mkl backend has been removed permanently, but we
|
|
// keep this Python attribute for BC
|
|
m.attr("has_mkldnn") = py::bool_(false);
|
|
|
|
m.attr("use_mkldnn") = py::bool_(
|
|
#ifdef USE_MKLDNN
|
|
true
|
|
#else // USE_MKLDNN
|
|
false
|
|
#endif // USE_MKLDNN
|
|
);
|
|
|
|
// if the binary is built with USE_ROCM, this is a ROCm build
|
|
// and therefore we need to ignore dyndep failures (because the the module
|
|
// may not have a ROCm equivalent yet e.g. nccl)
|
|
m.attr("use_rocm") = py::bool_(
|
|
#if defined(USE_ROCM)
|
|
true
|
|
#else // USE_ROCM
|
|
false
|
|
#endif // USE_ROCM
|
|
);
|
|
|
|
m.attr("use_trt") = py::bool_(
|
|
#ifdef CAFFE2_USE_TRT
|
|
true
|
|
#else // CAFFE2_USE_TRT
|
|
false
|
|
#endif // CAFFE2_USE_TRT
|
|
);
|
|
|
|
m.attr("define_caffe2_no_operator_schema") = py::bool_(
|
|
#ifdef CAFFE2_NO_OPERATOR_SCHEMA
|
|
true
|
|
#else // CAFFE2_NO_OPERATOR_SCHEMA
|
|
false
|
|
#endif // CAFFE2_NO_OPERATOR_SCHEMA
|
|
);
|
|
|
|
m.def("set_per_op_engine_pref", [](const PerOpEnginePrefType& pref) -> void {
|
|
caffe2::SetPerOpEnginePref(pref);
|
|
});
|
|
|
|
m.def("set_global_engine_pref", [](const GlobalEnginePrefType& pref) -> void {
|
|
caffe2::SetGlobalEnginePref(pref);
|
|
});
|
|
m.def(
|
|
"set_engine_pref",
|
|
[](const PerOpEnginePrefType& per_op_pref,
|
|
const GlobalEnginePrefType& global_pref) -> void {
|
|
caffe2::SetEnginePref(per_op_pref, global_pref);
|
|
});
|
|
m.def(
|
|
"set_op_engine_pref",
|
|
[](const std::string& op_type,
|
|
const CaffeMap<DeviceType, EnginePrefType>& op_pref) -> void {
|
|
caffe2::SetOpEnginePref(op_type, op_pref);
|
|
});
|
|
|
|
m.def(
|
|
"op_registry_key",
|
|
[](const std::string& op_type,
|
|
const std::string& engine) -> const std::string {
|
|
return caffe2::OpRegistryKey(op_type, engine);
|
|
});
|
|
m.def("global_init", [](std::vector<std::string> args) -> void {
|
|
int argc = args.size();
|
|
std::vector<char*> argv;
|
|
for (auto& arg : args) {
|
|
// NOLINTNEXTLINE(performance-inefficient-vector-operation,cppcoreguidelines-pro-type-const-cast)
|
|
argv.push_back(const_cast<char*>(arg.data()));
|
|
}
|
|
char** pargv = argv.data();
|
|
CAFFE_ENFORCE(caffe2::GlobalInit(&argc, &pargv));
|
|
});
|
|
|
|
m.def("registered_operators", []() {
|
|
std::set<string> all_keys = caffe2::GetRegisteredOperators();
|
|
|
|
// Ensure we are lexicographically ordered.
|
|
std::vector<std::string> keys;
|
|
for (const auto& key : all_keys) {
|
|
// NOLINTNEXTLINE(performance-inefficient-vector-operation)
|
|
keys.push_back(key);
|
|
}
|
|
return keys;
|
|
});
|
|
m.def("on_module_exit", []() { caffe2::python::ClearWorkspaces(); });
|
|
// create_if_missing not used by necessary for pybind to do
|
|
// properly do function overloading.
|
|
m.def(
|
|
"switch_workspace", [](Workspace* ws, py::object /*create_if_missing*/) {
|
|
// TODO
|
|
caffe2::python::SetCurrentWorkspace(ws);
|
|
});
|
|
m.def(
|
|
"create_child_workspace",
|
|
[](const std::string& parent_ws_name, const std::string& child_ws_name) {
|
|
auto parent_gws = caffe2::python::GetWorkspaceByName(parent_ws_name);
|
|
CAFFE_ENFORCE(parent_gws, "Parent ws does not exist.");
|
|
std::unique_ptr<Workspace> child_ws(new Workspace(parent_gws));
|
|
caffe2::python::InsertWorkspace(child_ws_name, std::move(child_ws));
|
|
},
|
|
"Create and register child ws, sharing existing blobs in parent ws.",
|
|
py::arg("parent_ws_name"),
|
|
py::arg("child_ws_name"));
|
|
m.def(
|
|
"switch_workspace",
|
|
[](const std::string& name, const py::object create_if_missing) {
|
|
if (create_if_missing.is_none()) {
|
|
return caffe2::python::SwitchWorkspaceInternal(name, false);
|
|
}
|
|
return caffe2::python::SwitchWorkspaceInternal(
|
|
name, create_if_missing.cast<bool>());
|
|
},
|
|
"Switch to the specified workspace, creating if necessary",
|
|
py::arg("name"),
|
|
py::arg("create_if_missing") = py::none());
|
|
m.def(
|
|
"reset_workspace",
|
|
[](const py::object& root_folder) {
|
|
VLOG(1) << "Resetting workspace.";
|
|
if (root_folder.is_none()) {
|
|
caffe2::python::ResetWorkspace(new Workspace());
|
|
} else {
|
|
caffe2::python::ResetWorkspace(
|
|
new Workspace(root_folder.cast<std::string>()));
|
|
}
|
|
return true;
|
|
},
|
|
"Reset the workspace",
|
|
py::arg("root_folder") = py::none());
|
|
|
|
m.def("root_folder", []() {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
return workspace->RootFolder();
|
|
});
|
|
m.def("current_workspace", []() {
|
|
return caffe2::python::GetCurrentWorkspaceName();
|
|
});
|
|
m.def("workspaces", []() {
|
|
std::vector<std::string> names;
|
|
caffe2::python::GetWorkspaceNames(names);
|
|
return names;
|
|
});
|
|
m.def("nearby_opnames", [](const std::string& name) {
|
|
std::vector<std::string> alternatives;
|
|
unsigned editTolerance = 3;
|
|
// NOLINTNEXTLINE(performance-for-range-copy)
|
|
for (auto it : caffe2::CPUOperatorRegistry()->Keys()) {
|
|
if (editDistance(it, name, editTolerance) < editTolerance + 1) {
|
|
alternatives.push_back(it);
|
|
}
|
|
}
|
|
return alternatives;
|
|
});
|
|
m.def("local_blobs", []() {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
return workspace->LocalBlobs();
|
|
});
|
|
m.def("blobs", []() {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
return workspace->Blobs();
|
|
});
|
|
m.def("has_blob", [](const std::string& name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
return workspace->HasBlob(name);
|
|
});
|
|
m.def(
|
|
"fill_random_network_inputs",
|
|
[](const py::bytes& net_def,
|
|
const std::vector<std::vector<std::vector<int64_t>>>& inputDims,
|
|
const std::vector<std::vector<std::string>>& inputTypes) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
py::gil_scoped_release g;
|
|
NetDef net;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_def.cast<std::string>(), &net));
|
|
caffe2::emulator::fillRandomNetworkInputs(
|
|
net, inputDims, inputTypes, workspace);
|
|
});
|
|
m.def(
|
|
"create_net",
|
|
[](py::bytes net_def, bool overwrite) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_def.cast<std::string>(), &proto),
|
|
"Can't parse net proto: ",
|
|
net_def.cast<std::string>());
|
|
CAFFE_ENFORCE(
|
|
workspace->CreateNet(proto, overwrite),
|
|
"Error creating net with proto: ",
|
|
net_def.cast<std::string>());
|
|
return true;
|
|
},
|
|
py::arg("net_def"),
|
|
py::arg("overwrite") = kPyBindFalse);
|
|
m.def("run_net", [](const std::string& name, int num_iter, bool allow_fail) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
CAFFE_ENFORCE(workspace->GetNet(name), "Can't find net ", name);
|
|
py::gil_scoped_release g;
|
|
for (int i = 0; i < num_iter; i++) {
|
|
bool success = workspace->RunNet(name);
|
|
if (!allow_fail) {
|
|
CAFFE_ENFORCE(success, "Error running net ", name);
|
|
} else {
|
|
if (!success) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
});
|
|
m.def(
|
|
"add_observer_to_net",
|
|
[](const std::string& net_name, const std::string& observer_type) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
CAFFE_ENFORCE(workspace->GetNet(net_name), "Can't find net ", net_name);
|
|
py::gil_scoped_release g;
|
|
|
|
NetBase* net = workspace->GetNet(net_name);
|
|
const Observable<NetBase>::Observer* observer = nullptr;
|
|
|
|
#define REGISTER_PYTHON_EXPOSED_OBSERVER(ob_type) \
|
|
{ \
|
|
if (observer_type.compare(#ob_type) == 0) { \
|
|
unique_ptr<ob_type> net_ob = make_unique<ob_type>(net); \
|
|
observer = net->AttachObserver(std::move(net_ob)); \
|
|
} \
|
|
}
|
|
|
|
REGISTER_PYTHON_EXPOSED_OBSERVER(ProfileObserver);
|
|
REGISTER_PYTHON_EXPOSED_OBSERVER(TimeObserver);
|
|
#undef REGISTER_PYTHON_EXPOSED_OBSERVER
|
|
|
|
if (observer_type.compare("RunCountObserver") == 0) {
|
|
unique_ptr<RunCountNetObserver> net_ob =
|
|
make_unique<RunCountNetObserver>(net);
|
|
observer = net->AttachObserver(std::move(net_ob));
|
|
}
|
|
|
|
CAFFE_ENFORCE(observer != nullptr);
|
|
return py::cast(observer);
|
|
});
|
|
m.def(
|
|
"remove_observer_from_net",
|
|
[](const std::string& net_name, const ObserverBase<NetBase>* observer) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
CAFFE_ENFORCE(workspace->GetNet(net_name), "Can't find net ", net_name);
|
|
py::gil_scoped_release g;
|
|
|
|
NetBase* net = workspace->GetNet(net_name);
|
|
net->DetachObserver(observer);
|
|
});
|
|
m.def("clear_global_net_observer", []() {
|
|
py::gil_scoped_release g;
|
|
caffe2::ClearGlobalNetObservers();
|
|
});
|
|
m.def("num_observers_on_net", [](const std::string& net_name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
CAFFE_ENFORCE(workspace->GetNet(net_name), "Can't find net ", net_name);
|
|
py::gil_scoped_release g;
|
|
|
|
NetBase* net = workspace->GetNet(net_name);
|
|
return net->NumObservers();
|
|
});
|
|
m.def(
|
|
"benchmark_net",
|
|
[](const std::string& name,
|
|
size_t warmup_runs,
|
|
size_t main_runs,
|
|
bool run_individual) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* net = workspace->GetNet(name);
|
|
CAFFE_ENFORCE(net, "Didn't find net: ", name);
|
|
py::gil_scoped_release g;
|
|
vector<float> stat =
|
|
net->TEST_Benchmark(warmup_runs, main_runs, run_individual);
|
|
return stat;
|
|
});
|
|
m.def("benchmark_net_once", [](const std::string& name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* net = workspace->GetNet(name);
|
|
CAFFE_ENFORCE(net, "Didn't find net: ", name);
|
|
py::gil_scoped_release g;
|
|
float stat = net->TEST_Benchmark_One_Run();
|
|
return stat;
|
|
});
|
|
|
|
m.def("delete_net", [](const std::string& name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
workspace->DeleteNet(name);
|
|
return true;
|
|
});
|
|
m.def("nets", []() {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
return workspace->Nets();
|
|
});
|
|
m.def("run_operator_once", [](const py::bytes& op_def) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
OperatorDef def;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(op_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(workspace->RunOperatorOnce(def));
|
|
return true;
|
|
});
|
|
// Run an operator multiple times.
|
|
// This is needed for microbenchmarking as we want the benchmark loop to be in
|
|
// C++ to minimize overhead.
|
|
m.def("run_operator_multiple", [](const py::bytes& op_def, int num_runs) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
OperatorDef def;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(op_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
std::unique_ptr<OperatorBase> op(CreateOperator(def, workspace));
|
|
for (int i = 0; i < num_runs; i++) {
|
|
if (!op->Run()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
});
|
|
m.def(
|
|
"get_operator_cost",
|
|
[](const py::bytes& op_def, const std::vector<string>& input_blobs) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
OperatorDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(op_def.cast<std::string>(), &def),
|
|
"Couldn't parse operator proto.");
|
|
const auto op_type = def.type();
|
|
auto* schema = OpSchemaRegistry::Schema(op_type);
|
|
CAFFE_ENFORCE(schema);
|
|
vector<TensorShape> shapes;
|
|
for (const auto& blob_name : input_blobs) {
|
|
auto* blob = workspace->GetBlob(blob_name);
|
|
shapes.emplace_back(GetTensorShapeOfBlob(blob));
|
|
}
|
|
const auto c = schema->InferCost(def, shapes);
|
|
return std::make_tuple(c.flops, c.bytes_written, c.bytes_read);
|
|
});
|
|
m.def("run_net_once", [](const py::bytes& net_def) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
NetDef def;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(net_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(workspace->RunNetOnce(def));
|
|
return true;
|
|
});
|
|
m.def("run_plan", [](const py::bytes& plan_def) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
PlanDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(plan_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
CAFFE_ENFORCE(workspace->RunPlan(def));
|
|
return true;
|
|
});
|
|
m.def("run_plan_in_background", [](const py::bytes& plan_def) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
PlanDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(plan_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
|
|
auto background_plan = std::make_shared<BackgroundPlan>(workspace, def);
|
|
background_plan->run();
|
|
return background_plan;
|
|
});
|
|
m.def(
|
|
"apply_transform",
|
|
[](const string& transform_key, const py::bytes& net_def) {
|
|
NetDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
|
|
auto transformed_net = ApplyTransform(transform_key, def);
|
|
|
|
std::string protob;
|
|
CAFFE_ENFORCE(transformed_net.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def(
|
|
"apply_transform_if_faster",
|
|
[](const string& transform_key,
|
|
const py::bytes& net_def_bytes,
|
|
const py::bytes& init_def_bytes,
|
|
int warmup_runs,
|
|
int main_runs,
|
|
double improvement_threshold) {
|
|
NetDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_def_bytes.cast<std::string>(), &def));
|
|
NetDef init_def;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(
|
|
init_def_bytes.cast<std::string>(), &init_def));
|
|
py::gil_scoped_release g;
|
|
|
|
std::string protob;
|
|
|
|
auto transformed_net = ApplyTransformIfFaster(
|
|
transform_key,
|
|
def,
|
|
init_def,
|
|
warmup_runs,
|
|
main_runs,
|
|
improvement_threshold);
|
|
|
|
CAFFE_ENFORCE(transformed_net.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def(
|
|
"memonger_compute_blob_recycling_for_dag",
|
|
[](const py::bytes& net_def,
|
|
const std::vector<string>& input_blobs,
|
|
const std::vector<int>& op_indices,
|
|
const std::unordered_set<string>& shareable_blob_names,
|
|
const string& namescope,
|
|
const std::unordered_set<string>& dont_share_blob_names,
|
|
const std::unordered_map<string, vector<int>>& blob_shapes) {
|
|
py::gil_scoped_release g;
|
|
NetDef net;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_def.cast<std::string>(), &net));
|
|
NetDef optimized_proto =
|
|
caffe2::memonger::compute_blob_recycling_for_dag(
|
|
net,
|
|
input_blobs,
|
|
op_indices,
|
|
shareable_blob_names,
|
|
namescope,
|
|
dont_share_blob_names,
|
|
blob_shapes);
|
|
std::string protob;
|
|
CAFFE_ENFORCE(optimized_proto.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def(
|
|
"memonger_optimize_inference_net",
|
|
[](const py::bytes& net_def,
|
|
const std::vector<std::string>& static_blobs) {
|
|
NetDef def;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_def.cast<std::string>(), &def));
|
|
py::gil_scoped_release g;
|
|
|
|
std::set<string> static_blobs_set(
|
|
static_blobs.begin(), static_blobs.end());
|
|
NetDef optimized =
|
|
caffe2::memonger::optimize_inference_net(def, static_blobs_set);
|
|
|
|
std::string protob;
|
|
CAFFE_ENFORCE(optimized.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def(
|
|
"infer_shapes_and_types_from_workspace",
|
|
[](const std::vector<py::bytes>& net_protos) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
|
|
// Parse protobuffers to NetDefs
|
|
std::vector<std::unique_ptr<caffe2::NetDef>> nets;
|
|
std::vector<caffe2::NetDef*> nets_ptr;
|
|
// NOLINTNEXTLINE(performance-for-range-copy)
|
|
for (auto proto : net_protos) {
|
|
std::unique_ptr<NetDef> def(new NetDef());
|
|
CAFFE_ENFORCE(def->ParseFromString(proto));
|
|
nets_ptr.push_back(def.get());
|
|
nets.push_back(std::move(def));
|
|
}
|
|
|
|
auto blob_info =
|
|
InferBlobShapesAndTypesFromWorkspace(workspace, nets_ptr);
|
|
|
|
std::string protob;
|
|
CAFFE_ENFORCE(blob_info.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def(
|
|
"infer_shapes_and_types_from_map",
|
|
[](const std::vector<py::bytes>& net_protos,
|
|
const std::map<std::string, std::vector<int64_t>> blob_dimensions) {
|
|
// Parse protobuffers to NetDefs
|
|
std::vector<std::unique_ptr<caffe2::NetDef>> nets;
|
|
std::vector<caffe2::NetDef*> nets_ptr;
|
|
// NOLINTNEXTLINE(performance-for-range-copy)
|
|
for (auto proto : net_protos) {
|
|
std::unique_ptr<NetDef> def(new NetDef());
|
|
CAFFE_ENFORCE(def->ParseFromString(proto));
|
|
nets_ptr.push_back(def.get());
|
|
nets.push_back(std::move(def));
|
|
}
|
|
|
|
auto blob_info =
|
|
InferBlobShapesAndTypesFromMap(blob_dimensions, nets_ptr);
|
|
|
|
std::string protob;
|
|
CAFFE_ENFORCE(blob_info.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def(
|
|
"infer_shapes_and_types_from_map",
|
|
[](const std::vector<py::bytes>& net_protos,
|
|
const std::map<std::string, std::vector<int64_t>> blob_dimensions,
|
|
const std::map<std::string, int> int_blob_types) {
|
|
// Parse protobuffers to NetDefs
|
|
std::vector<std::unique_ptr<caffe2::NetDef>> nets;
|
|
std::vector<caffe2::NetDef*> nets_ptr;
|
|
// NOLINTNEXTLINE(performance-for-range-copy)
|
|
for (auto proto : net_protos) {
|
|
std::unique_ptr<NetDef> def(new NetDef());
|
|
CAFFE_ENFORCE(def->ParseFromString(proto));
|
|
nets_ptr.push_back(def.get());
|
|
nets.push_back(std::move(def));
|
|
}
|
|
std::map<std::string, TensorProto_DataType> blob_types;
|
|
// NOLINTNEXTLINE(performance-for-range-copy)
|
|
for (auto blob_type : int_blob_types) {
|
|
blob_types[blob_type.first] =
|
|
static_cast<TensorProto_DataType>(blob_type.second);
|
|
}
|
|
|
|
auto blob_info = InferBlobShapesAndTypesFromMap(
|
|
blob_dimensions, blob_types, nets_ptr);
|
|
|
|
std::string protob;
|
|
CAFFE_ENFORCE(blob_info.SerializeToString(&protob));
|
|
return py::bytes(protob);
|
|
});
|
|
m.def("ssa_rewrite", [](const py::bytes& net_proto) {
|
|
auto net_def = std::make_unique<NetDef>();
|
|
CAFFE_ENFORCE(net_def->ParseFromString(net_proto));
|
|
onnx::SsaRewrite(nullptr, net_def.get());
|
|
std::string output_net_proto;
|
|
CAFFE_ENFORCE(net_def->SerializeToString(&output_net_proto));
|
|
return py::bytes(output_net_proto);
|
|
});
|
|
m.def("create_blob", [](const std::string& name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
CAFFE_ENFORCE(workspace->CreateBlob(name));
|
|
return true;
|
|
});
|
|
m.def("reset_blob", [](const std::string& name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* b = workspace->GetBlob(name);
|
|
CAFFE_ENFORCE(b);
|
|
b->Reset();
|
|
});
|
|
m.def("fetch_blob", [](const std::string& name) -> py::object {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
return python_detail::fetchBlob(workspace, name);
|
|
});
|
|
m.def(
|
|
"feed_blob",
|
|
[](const std::string& name, py::object arg, py::object device_option) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
auto* blob = workspace->CreateBlob(name);
|
|
return python_detail::feedBlob(blob, arg, device_option);
|
|
},
|
|
"",
|
|
py::arg("name"),
|
|
py::arg("arg"),
|
|
py::arg("device_option") = py::none());
|
|
m.def("deserialize_blob", [](const string& content) {
|
|
return python_detail::deserializeBlob(content);
|
|
});
|
|
m.def("serialize_blob", [](const std::string& name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* blob = workspace->GetBlob(name);
|
|
CAFFE_ENFORCE(blob);
|
|
return py::bytes(SerializeBlob(*blob, name));
|
|
});
|
|
m.def(
|
|
"deserialize_blob",
|
|
[](const std::string& name, const py::bytes& serialized) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* blob = workspace->CreateBlob(name);
|
|
DeserializeBlob(serialized.cast<std::string>(), blob);
|
|
});
|
|
|
|
// we support 2 possible signatures of python op: (inputs, outputs) or
|
|
// (inputs, outputs, workspace)
|
|
m.def(
|
|
"register_python_op",
|
|
[](py::object func, bool pass_workspace, std::string name) {
|
|
using namespace python_detail;
|
|
CAFFE_ENFORCE(!func.is_none());
|
|
if (!name.empty()) {
|
|
name += ":";
|
|
}
|
|
name += func.attr("__name__").cast<std::string>();
|
|
std::string token = name;
|
|
for (int i = 1; gRegistry().count(token) > 0; ++i) {
|
|
token = name + ":" + to_string(i);
|
|
}
|
|
gRegistry()[token] = Func{func, pass_workspace};
|
|
return token;
|
|
});
|
|
m.def(
|
|
"register_python_gradient_op",
|
|
[](const std::string& token, py::object func) {
|
|
using namespace python_detail;
|
|
CAFFE_ENFORCE(!func.is_none());
|
|
CAFFE_ENFORCE(gRegistry().find(token) != gRegistry().end());
|
|
// For global sanity gradient ops shouldn't access workspace
|
|
gRegistry()[token + "_gradient"] = Func{func, false};
|
|
});
|
|
m.def("infer_op_input_output_device", [](const py::bytes& op) {
|
|
std::unique_ptr<caffe2::OperatorDef> def(new caffe2::OperatorDef());
|
|
CAFFE_ENFORCE(def.get()->ParseFromString(op));
|
|
// device_info is a pair of vector of DeviceOption.
|
|
// `first` is for inputs, `second` is for outputs.
|
|
auto device_info = InferOpInputOutputDevice(*def);
|
|
|
|
std::vector<py::bytes> in_res;
|
|
std::vector<py::bytes> out_res;
|
|
for (auto& in_dev : device_info.first) {
|
|
std::string protob;
|
|
CAFFE_ENFORCE(in_dev.SerializeToString(&protob));
|
|
// NOLINTNEXTLINE(modernize-use-emplace)
|
|
in_res.push_back(py::bytes(protob));
|
|
}
|
|
for (auto& out_dev : device_info.second) {
|
|
std::string protob;
|
|
CAFFE_ENFORCE(out_dev.SerializeToString(&protob));
|
|
// NOLINTNEXTLINE(modernize-use-emplace)
|
|
out_res.push_back(py::bytes(protob));
|
|
}
|
|
return std::make_pair(in_res, out_res);
|
|
});
|
|
m.def("get_stats", []() {
|
|
ExportedStatList stats;
|
|
StatRegistry::get().publish(stats);
|
|
std::unordered_map<std::string, int> stats_map;
|
|
for (const auto& stat : stats) {
|
|
stats_map[stat.key] = stat.value;
|
|
}
|
|
return stats_map;
|
|
});
|
|
m.def("is_numa_enabled", []() { return IsNUMAEnabled(); });
|
|
m.def("get_num_numa_nodes", []() { return GetNumNUMANodes(); });
|
|
m.def("get_blob_numa_node", [](const std::string& blob_name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* blob = workspace->GetBlob(blob_name);
|
|
CAFFE_ENFORCE(blob);
|
|
const TensorCPU& tensor = blob->Get<TensorCPU>();
|
|
const void* raw_data = tensor.raw_data();
|
|
CAFFE_ENFORCE(raw_data);
|
|
return GetNUMANode(raw_data);
|
|
});
|
|
m.def("get_blob_size_bytes", [](const std::string& blob_name) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
auto* blob = workspace->GetBlob(blob_name);
|
|
CAFFE_ENFORCE(blob);
|
|
return BlobStat::sizeBytes(*blob);
|
|
});
|
|
m.def("support_onnx_export", [](const std::string& op) -> bool {
|
|
const OpSchema* schema = caffe2::OpSchemaRegistry::Schema(op);
|
|
if (!schema) {
|
|
return false;
|
|
}
|
|
return !schema->onnx_schema().empty();
|
|
});
|
|
m.def(
|
|
"export_to_onnx",
|
|
[](caffe2::onnx::DummyName* dummy,
|
|
const py::bytes& c2op,
|
|
const std::unordered_map<std::string, std::vector<int>>& shapes)
|
|
-> std::pair<std::vector<py::bytes>, std::vector<py::bytes>> {
|
|
OperatorDef op;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(c2op.cast<std::string>(), &op));
|
|
const auto& type = op.type();
|
|
const OpSchema* schema = caffe2::OpSchemaRegistry::Schema(type);
|
|
CAFFE_ENFORCE(schema);
|
|
std::unordered_map<std::string, TensorShape> tensor_shapes;
|
|
for (const auto& it : shapes) {
|
|
tensor_shapes.emplace(
|
|
it.first, CreateTensorShape(it.second, TensorProto::FLOAT));
|
|
}
|
|
auto results =
|
|
onnx::OnnxExporter(dummy).Caffe2OpToOnnxNodes(op, tensor_shapes);
|
|
std::pair<std::vector<py::bytes>, std::vector<py::bytes>> ret;
|
|
auto& nodes_str = ret.first;
|
|
auto& tensors_str = ret.second;
|
|
for (const auto& node : results.first) {
|
|
std::string out;
|
|
node.SerializeToString(&out);
|
|
nodes_str.emplace_back(py::bytes(out));
|
|
}
|
|
for (const auto& tensor : results.second) {
|
|
std::string out;
|
|
tensor.SerializeToString(&out);
|
|
tensors_str.emplace_back(py::bytes(out));
|
|
}
|
|
return ret;
|
|
});
|
|
|
|
#define CAFFE2_CPU_FEATURE_SUPPORT(feature) \
|
|
m.def("builtin_cpu_supports_" #feature, []() { return GetCpuId().feature(); })
|
|
|
|
CAFFE2_CPU_FEATURE_SUPPORT(avx2);
|
|
|
|
#undef CAFFE2_CPU_FEATURE_SUPPORT
|
|
m.def("transform_exists", [](const std::string& transform_name) {
|
|
return OptimizationPassRegistry()->Has(transform_name);
|
|
});
|
|
m.def("workspace_transform_exists", [](const std::string& transform_name) {
|
|
return WorkspaceOptimizationPassRegistry()->Has(transform_name);
|
|
});
|
|
m.def("run_transform", [](const std::string& transform_name, py::bytes def) {
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
auto nn = caffe2::convertToNNModule(proto);
|
|
auto pass = OptimizationPassRegistry()->Create(transform_name, &nn);
|
|
|
|
CAFFE_ENFORCE(pass, "Pass doesn't exist: ", transform_name);
|
|
pass->run();
|
|
|
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
|
std::string out;
|
|
new_proto.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
m.def(
|
|
"create_offline_tensor",
|
|
[](const std::string& name,
|
|
const std::vector<int>& dims,
|
|
int datatype) -> bool {
|
|
Workspace* curr_ws = GetCurrentWorkspace();
|
|
auto* b = curr_ws->CreateBlob(name);
|
|
auto* offline = b->GetMutable<OfflineTensor>();
|
|
CAFFE_ENFORCE(offline);
|
|
offline->setShapeAndType(
|
|
dims,
|
|
CPU,
|
|
DataTypeToTypeMeta(static_cast<TensorProto::DataType>(datatype)));
|
|
return true;
|
|
});
|
|
m.def(
|
|
"onnxifi_set_option",
|
|
[](const std::string& optionName,
|
|
const std::string& optionValue) -> bool {
|
|
OnnxifiOptionHelper ts;
|
|
return ts.setOnnxifiOption(optionName, optionValue);
|
|
});
|
|
m.def("onnxifi_get_option", [](const std::string& optionName) -> std::string {
|
|
OnnxifiOptionHelper ts;
|
|
return ts.getOnnxifiOption(optionName);
|
|
});
|
|
m.def(
|
|
"onnxifi",
|
|
[](const py::bytes& pred_net_str,
|
|
const py::bytes& shapes_str,
|
|
const std::vector<int>& block_list,
|
|
const std::vector<std::string>& weight_names,
|
|
int max_batch_size,
|
|
int max_seq_size,
|
|
int timeout,
|
|
bool adjust_batch,
|
|
bool debug_builder,
|
|
bool merge_fp32_inputs_into_fp16,
|
|
bool net_ssa_rewritten,
|
|
bool use_onnx) -> py::bytes {
|
|
caffe2::NetDef pred_net;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(
|
|
pred_net_str.cast<std::string>(), &pred_net),
|
|
"broken pred_net protobuf");
|
|
Workspace* curr_ws = GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(curr_ws);
|
|
splitSparseLengthsSumSparse(&pred_net, *curr_ws);
|
|
caffe2::TensorBoundShapes tbs;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(shapes_str.cast<std::string>(), &tbs),
|
|
"broken TensorBoundShapes protobuf");
|
|
ShapeInfoMap shape_map = caffe2::extractShapeInfoFromTensorBoundShapes(
|
|
tbs, max_batch_size, max_seq_size);
|
|
OnnxifiTransformerOptions opts;
|
|
opts.bound_shape_spec.max_batch_size = max_batch_size;
|
|
opts.bound_shape_spec.max_seq_size = max_seq_size;
|
|
opts.timeout = timeout;
|
|
opts.adjust_batch = adjust_batch;
|
|
opts.debug = debug_builder;
|
|
opts.merge_fp32_inputs_into_fp16 = merge_fp32_inputs_into_fp16;
|
|
opts.predictor_net_ssa_rewritten = net_ssa_rewritten;
|
|
opts.use_onnx = use_onnx;
|
|
OnnxifiTransformer ts(opts);
|
|
std::unordered_set<int> blocklist_set(
|
|
block_list.begin(), block_list.end());
|
|
std::vector<std::string> weight_names_overwrite{};
|
|
if (weight_names.size() == 0) {
|
|
weight_names_overwrite = curr_ws->Blobs();
|
|
} else {
|
|
weight_names_overwrite = weight_names;
|
|
}
|
|
ts.transform(
|
|
curr_ws,
|
|
&pred_net,
|
|
weight_names_overwrite,
|
|
shape_map,
|
|
blocklist_set);
|
|
std::string pred_net_str2;
|
|
pred_net.SerializeToString(&pred_net_str2);
|
|
return py::bytes(pred_net_str2);
|
|
});
|
|
m.def(
|
|
"run_workspace_transform",
|
|
[](const std::string& transform_name, py::bytes def) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
auto nn = caffe2::convertToNNModule(proto);
|
|
auto pass = WorkspaceOptimizationPassRegistry()->Create(
|
|
transform_name, &nn, workspace);
|
|
|
|
CAFFE_ENFORCE(pass, "Pass doesn't exist: ", transform_name);
|
|
pass->run();
|
|
|
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
|
std::string out;
|
|
new_proto.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
m.def("fakeFp16FuseOps", [](const py::bytes& net_str) {
|
|
caffe2::NetDef netDef;
|
|
CAFFE_ENFORCE(
|
|
ParseProtoFromLargeString(net_str.cast<std::string>(), &netDef),
|
|
"broken pred_net protobuf");
|
|
opt::fakeFp16FuseOps(&netDef);
|
|
std::string out_net;
|
|
netDef.SerializeToString(&out_net);
|
|
return py::bytes(out_net);
|
|
});
|
|
|
|
// Transformations are exposed as functions here and wrapped
|
|
// into a python interface in transformations.py
|
|
// Prefix the transformation with transform_ to avoid clobbering the
|
|
// function namespace.
|
|
m.def("transform_optimizeForMKLDNN", [](py::bytes def, bool training_mode) {
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
|
|
auto nn = caffe2::convertToNNModule(proto);
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
opt::OptimizeForMkldnn(&nn, workspace, training_mode);
|
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
|
|
|
std::string out;
|
|
new_proto.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
|
|
m.def("transform_addNNPACK", [](py::bytes def) {
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
|
|
auto nn = caffe2::convertToNNModule(proto);
|
|
opt::addNNPACK(&nn);
|
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
|
|
|
std::string out;
|
|
new_proto.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
|
|
m.def("transform_fuseConvBN", [](py::bytes def) {
|
|
Workspace* workspace = caffe2::python::GetCurrentWorkspace();
|
|
CAFFE_ENFORCE(workspace);
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
|
|
auto nn = caffe2::convertToNNModule(proto);
|
|
opt::fuseConvBN(&nn, workspace);
|
|
auto new_proto = caffe2::convertToCaffe2Proto(nn);
|
|
|
|
std::string out;
|
|
new_proto.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
|
|
m.def("transform_fuseNNPACKConvRelu", [](py::bytes def) {
|
|
caffe2::NetDef proto;
|
|
CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto));
|
|
|
|
auto nn = caffe2::convertToNNModule(proto);
|
|
opt::fuseNNPACKConvRelu(&nn);
|
|
auto new_proto = caffe2::convertToCaffe2Proto(nn, proto);
|
|
|
|
std::string out;
|
|
new_proto.SerializeToString(&out);
|
|
return py::bytes(out);
|
|
});
|
|
|
|
auto initialize = [&]() {
|
|
// Initialization of the module
|
|
#ifdef USE_NUMPY
|
|
([]() -> void {
|
|
// import_array1() forces a void return value.
|
|
import_array1();
|
|
})();
|
|
#endif // USE_NUMPY
|
|
// Single threaded, so safe
|
|
static bool initialized = false;
|
|
if (initialized) {
|
|
return;
|
|
}
|
|
// We will create a default workspace for us to run stuff.
|
|
caffe2::python::SwitchWorkspaceInternal("default", true);
|
|
initialized = true;
|
|
};
|
|
|
|
initialize();
|
|
};
|
|
|
|
PYBIND11_MODULE(caffe2_pybind11_state, m) {
|
|
m.doc() = "pybind11 stateful interface to Caffe2 workspaces";
|
|
|
|
C10_LOG_API_USAGE_ONCE("caffe2.python.import");
|
|
|
|
addGlobalMethods(m);
|
|
addObjectMethods(m);
|
|
for (const auto& addition : PybindAdditionRegistry()->Keys()) {
|
|
PybindAdditionRegistry()->Create(addition, m);
|
|
}
|
|
}
|
|
|
|
} // namespace python
|
|
} // namespace caffe2
|