pytorch/torch/csrc/deploy/deploy.cpp

#include <torch/csrc/deploy/deploy.h>

#include <dlfcn.h>
#include <libgen.h>
#include <unistd.h>

// these symbols are generated by cmake, using ld -r -b binary
// libtorch_deployinterpreter.so which takes the contents of the so and embeds
// it into a symbol that is then linked into libtorch_deploy.so. This enables us
// to simply copy the contents of this symbol to disk and dlopen it to create an
// instance of python.
extern "C" char _binary_libtorch_deployinterpreter_so_start[];
extern "C" char _binary_libtorch_deployinterpreter_so_end[];

namespace torch {
namespace deploy {

Package InterpreterManager::load_package(const std::string& uri) {
  return Package(uri, this);
}

Package InterpreterManager::load_package(std::shared_ptr<caffe2::serialize::ReadAdapterInterface> reader) {
  return Package(reader, this);
}

Obj InterpreterSession::from_movable(const ReplicatedObj& obj) {
  return impl_->unpickle_or_get(obj.pImpl_->object_id_, obj.pImpl_->data_);
}

InterpreterSession ReplicatedObj::acquire_session(
    const Interpreter* on_this_interpreter) const {
  InterpreterSession I = on_this_interpreter
      ? on_this_interpreter->acquire_session()
      : pImpl_->manager_->acquire_one();
  I.self = I.from_movable(*this);
  return I;
}

InterpreterSession::~InterpreterSession() {
  if (manager_ && notify_idx_ >= 0) {
    manager_->resources_.free(notify_idx_);
  }
}

void ReplicatedObjImpl::unload(const Interpreter* on_this_interpreter) {
  if (!on_this_interpreter) {
    for (auto& interp : manager_->all_instances()) {
      unload(&interp);
    }
    return;
  }

  InterpreterSession I = on_this_interpreter->acquire_session();
  I.impl_->unload(object_id_);
}

ReplicatedObjImpl::~ReplicatedObjImpl() {
  unload(nullptr);
}

void ReplicatedObj::unload(const Interpreter* on_this_interpreter) {
  pImpl_->unload(on_this_interpreter);
}

ReplicatedObj InterpreterSession::create_movable(Obj obj) {
  TORCH_CHECK(
      manager_,
      "Can only create a movable object when the session was created from an interpreter that is part of a InterpreterManager");
  auto pickled = impl_->pickle(self, obj);
  return ReplicatedObj(std::make_shared<ReplicatedObjImpl>(
      manager_->next_object_id_++, std::move(pickled), manager_));
}

Interpreter::Interpreter(InterpreterManager* manager)
    : handle_(nullptr), manager_(manager) {
  char library_name[] = "/tmp/torch_deployXXXXXX";
  int fd = mkstemp(library_name);
  TORCH_INTERNAL_ASSERT(fd != -1, "failed to create temporary file");
  library_name_ = library_name;
  FILE* dst = fdopen(fd, "wb");
  TORCH_INTERNAL_ASSERT(dst);
  size_t size = _binary_libtorch_deployinterpreter_so_end -
      _binary_libtorch_deployinterpreter_so_start;
  TORCH_INTERNAL_ASSERT(
      size ==
      fwrite(_binary_libtorch_deployinterpreter_so_start, 1, size, dst));
  fclose(dst);
  handle_ = dlopen(library_name, RTLD_LOCAL | RTLD_LAZY);
  if (!handle_) {
    throw std::runtime_error(dlerror());
  }

  // note: if you want better debugging symbols for things inside
  // new_intepreter_impl, comment out this line so that the so lasts long enough
  // for the debugger to see it.
  unlink(library_name_.c_str());

  void* new_interpreter_impl = dlsym(handle_, "new_interpreter_impl");
  assert(new_interpreter_impl);
  pImpl_ = std::unique_ptr<InterpreterImpl>(
      ((InterpreterImpl * (*)(void)) new_interpreter_impl)());
}

Interpreter::~Interpreter() {
  if (handle_) {
    // ensure python uninitialization runs before we dlclose the library
    pImpl_.reset();
    dlclose(handle_);
  }
}

int LoadBalancer::acquire() {
  thread_local int last = 0;
  size_t minusers = SIZE_MAX;
  int min_idx = 0;
  for (size_t i = 0; i < n_; ++i, ++last) {
    if (last >= n_) {
      last = 0;
    }
    uint64_t prev = 0;
    bool acquired = __atomic_compare_exchange_n(
        &uses_[8 * last],
        &prev,
        1ULL,
        false,
        __ATOMIC_SEQ_CST,
        __ATOMIC_SEQ_CST);
    if (acquired) {
      // fast path, we found an interpreter with no users
      return last;
    }
    // slow path, we don't want to use this interpreter because it is being
    // used by someone else.

    if (prev < minusers) {
      minusers = prev;
      min_idx = last;
    }
  }
  // we failed to find a completely free interpreter. heuristically use the
  // one with the least number of user (note that this may have changed since
  // then, so this is only a heuristic).
  __atomic_fetch_add(&uses_[8 * min_idx], 1ULL, __ATOMIC_SEQ_CST);
  return min_idx;
}

void LoadBalancer::free(int where) {
  __atomic_fetch_sub(&uses_[8 * where], 1ULL, __ATOMIC_SEQ_CST);
}

} // namespace deploy
} // namespace torch