mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[nativert] Move Placement to pytorch core (#152953)
Summary: Move Placement to pytorch core. Using `torch::nativert::isSameDevice` explicitly in code to avoid confusion with the `isSameDevice` in torch namespace. Test Plan: ``` buck run fbcode//mode/dev-nosan //caffe2/test/cpp/nativert:placement_test ./bin/test_nativert ``` OSS and internal CI Differential Revision: D74190745 Pull Request resolved: https://github.com/pytorch/pytorch/pull/152953 Approved by: https://github.com/Skylion007, https://github.com/swolchok, https://github.com/zhxchen17, https://github.com/cyyever
This commit is contained in:
committed by
PyTorch MergeBot
parent
ced90d23d3
commit
2e440e39a6
@ -590,6 +590,8 @@ libtorch_core_jit_sources = sorted(jit_sources_full)
|
||||
|
||||
libtorch_nativert_sources = [
|
||||
"torch/nativert/graph/TensorMeta.cpp",
|
||||
"torch/nativert/executor/Placement.cpp",
|
||||
"torch/nativert/executor/PlacementUtils.cpp",
|
||||
]
|
||||
|
||||
torch_mobile_tracer_sources = [
|
||||
|
@ -6,6 +6,7 @@ file(GLOB_RECURSE NATIVERT_ALL_TEST_FILES "${NATIVERT_TEST_ROOT}/test_*.cpp")
|
||||
set(NATIVERT_TEST_SRCS
|
||||
${NATIVERT_ALL_TEST_FILES}
|
||||
${TORCH_ROOT}/torch/nativert/graph/TensorMeta.cpp
|
||||
${TORCH_ROOT}/torch/nativert/executor/PlacementUtils.cpp
|
||||
)
|
||||
|
||||
add_executable(test_nativert
|
||||
@ -19,6 +20,7 @@ target_compile_definitions(test_nativert PRIVATE USE_GTEST)
|
||||
set(NATIVERT_TEST_DEPENDENCIES torch gtest)
|
||||
|
||||
target_link_libraries(test_nativert PRIVATE ${NATIVERT_TEST_DEPENDENCIES})
|
||||
target_link_libraries(test_nativert PRIVATE fmt::fmt-header-only)
|
||||
target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE})
|
||||
|
||||
if(USE_CUDA)
|
||||
|
104
test/cpp/nativert/test_placement.cpp
Normal file
104
test/cpp/nativert/test_placement.cpp
Normal file
@ -0,0 +1,104 @@
|
||||
|
||||
#include <c10/core/Device.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <torch/nativert/executor/Placement.h>
|
||||
|
||||
using namespace ::testing;
|
||||
|
||||
namespace torch::nativert {
|
||||
TEST(PlacementTest, NormalizeDevice) {
|
||||
c10::Device cpuDevice = c10::Device(c10::DeviceType::CPU);
|
||||
c10::Device cpuDevice1 = c10::Device(c10::DeviceType::CPU);
|
||||
cpuDevice1.set_index(1);
|
||||
|
||||
EXPECT_EQ(normalizeDevice(cpuDevice), cpuDevice);
|
||||
EXPECT_NE(normalizeDevice(cpuDevice1), cpuDevice1);
|
||||
|
||||
c10::Device cudaDevice = c10::Device(c10::DeviceType::CUDA);
|
||||
c10::Device cudaDevice1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||
EXPECT_EQ(normalizeDevice(cudaDevice), c10::Device(c10::DeviceType::CUDA, 0));
|
||||
EXPECT_EQ(
|
||||
normalizeDevice(cudaDevice1), c10::Device(c10::DeviceType::CUDA, 1));
|
||||
|
||||
EXPECT_NE(
|
||||
normalizeDevice(cudaDevice1), c10::Device(c10::DeviceType::CUDA, 0));
|
||||
}
|
||||
|
||||
TEST(PlacementTest, IsSameDevice) {
|
||||
c10::Device cpuDevice = c10::Device(c10::DeviceType::CPU);
|
||||
c10::Device cpuDevice1 = c10::Device(c10::DeviceType::CPU);
|
||||
cpuDevice1.set_index(1);
|
||||
|
||||
EXPECT_TRUE(isSameDevice(cpuDevice, cpuDevice));
|
||||
EXPECT_TRUE(isSameDevice(cpuDevice, cpuDevice1));
|
||||
|
||||
c10::Device cudaDevice = c10::Device(c10::DeviceType::CUDA);
|
||||
c10::Device cudaDevice0 = c10::Device(c10::DeviceType::CUDA, 0);
|
||||
c10::Device cudaDevice1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||
EXPECT_TRUE(isSameDevice(cudaDevice, cudaDevice0));
|
||||
EXPECT_FALSE(isSameDevice(cudaDevice0, cudaDevice1));
|
||||
|
||||
EXPECT_FALSE(isSameDevice(cudaDevice0, cpuDevice));
|
||||
}
|
||||
|
||||
TEST(PlacementTest, PlacementDefaultOnly) {
|
||||
Placement placement(c10::Device(c10::DeviceType::CUDA, 0));
|
||||
|
||||
std::ostringstream os;
|
||||
os << placement;
|
||||
EXPECT_EQ(os.str(), "|cuda:0");
|
||||
|
||||
c10::Device cuda0 = c10::Device(c10::DeviceType::CUDA, 0);
|
||||
c10::Device cuda1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||
c10::Device cuda2 = c10::Device(c10::DeviceType::CUDA, 2);
|
||||
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda0), cuda0);
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda1), cuda0);
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda2), cuda0);
|
||||
}
|
||||
|
||||
TEST(PlacementTest, PlacementBasic) {
|
||||
Placement placement(
|
||||
{{c10::Device(c10::DeviceType::CPU), c10::Device(c10::DeviceType::CPU)},
|
||||
{c10::Device(c10::DeviceType::CUDA, 0),
|
||||
c10::Device(c10::DeviceType::CUDA, 1)},
|
||||
{c10::Device(c10::DeviceType::CUDA, 1),
|
||||
c10::Device(c10::DeviceType::CUDA, 2)}},
|
||||
c10::Device(c10::DeviceType::CUDA, 0));
|
||||
|
||||
std::ostringstream os;
|
||||
os << placement;
|
||||
EXPECT_EQ(os.str(), "cpu|cpu,cuda:0|cuda:1,cuda:1|cuda:2,|cuda:0");
|
||||
|
||||
c10::Device cpu = c10::Device(c10::DeviceType::CPU);
|
||||
c10::Device cuda0 = c10::Device(c10::DeviceType::CUDA, 0);
|
||||
c10::Device cuda1 = c10::Device(c10::DeviceType::CUDA, 1);
|
||||
c10::Device cuda2 = c10::Device(c10::DeviceType::CUDA, 2);
|
||||
c10::Device cuda3 = c10::Device(c10::DeviceType::CUDA, 3);
|
||||
|
||||
EXPECT_EQ(placement.getMappedDevice(cpu), cpu);
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda0), cuda1);
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda1), cuda2);
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda2), cuda0);
|
||||
EXPECT_EQ(placement.getMappedDevice(cuda3), cuda0);
|
||||
}
|
||||
|
||||
TEST(PlacementTest, Placement) {
|
||||
std::unordered_map<c10::Device, c10::Device> deviceMap1 = {
|
||||
{c10::Device("cuda:0"), c10::Device("cuda:1")}};
|
||||
Placement p1(deviceMap1);
|
||||
EXPECT_EQ(p1.getMappedDevice(c10::Device("cpu")), c10::Device("cpu"));
|
||||
EXPECT_EQ(p1.getMappedDevice(c10::Device("cuda")), c10::Device("cuda:1"));
|
||||
EXPECT_EQ(p1.getMappedDevice(c10::Device("cuda:0")), c10::Device("cuda:1"));
|
||||
|
||||
std::unordered_map<c10::Device, c10::Device> deviceMap2 = {
|
||||
{c10::Device("cpu"), c10::Device("cuda")}};
|
||||
Placement p2(deviceMap2);
|
||||
EXPECT_EQ(p2.getMappedDevice(c10::Device("cpu")), c10::Device("cuda:0"));
|
||||
EXPECT_EQ(p2.getMappedDevice(c10::Device("cuda:0")), c10::Device("cuda:0"));
|
||||
EXPECT_EQ(p2.getMappedDevice(c10::Device("cuda:1")), c10::Device("cuda:1"));
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
61
torch/nativert/executor/Placement.cpp
Normal file
61
torch/nativert/executor/Placement.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
#include <torch/nativert/executor/Placement.h>
|
||||
|
||||
#include <fmt/ostream.h>
|
||||
#include <ostream>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Placement& placement) {
|
||||
std::vector<std::pair<std::string, c10::Device>> sorted_keys;
|
||||
sorted_keys.reserve(placement.deviceMap_.size());
|
||||
for (const auto& pair : placement.deviceMap_) {
|
||||
sorted_keys.emplace_back(pair.first.str(), pair.first);
|
||||
}
|
||||
std::sort(
|
||||
sorted_keys.begin(), sorted_keys.end(), [](const auto& a, const auto& b) {
|
||||
return a.first < b.first;
|
||||
});
|
||||
|
||||
bool first = true;
|
||||
for (const auto& pair : sorted_keys) {
|
||||
if (!first) {
|
||||
fmt::print(os, ",");
|
||||
}
|
||||
first = false;
|
||||
const auto& key = pair.second;
|
||||
const auto& value = placement.deviceMap_.at(key);
|
||||
fmt::print(os, "{}|{}", pair.first, value.str());
|
||||
}
|
||||
if (placement.defaultDevice_.has_value()) {
|
||||
fmt::print(os, "{}|{}", first ? "" : ",", placement.defaultDevice_->str());
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
Placement::Placement(std::optional<c10::Device> defaultDevice)
|
||||
: Placement({}, defaultDevice) {}
|
||||
|
||||
Placement::Placement(
|
||||
const std::unordered_map<c10::Device, c10::Device>& deviceMap,
|
||||
std::optional<c10::Device> defaultDevice) {
|
||||
for (const auto& [srcDevice, dstDevice] : deviceMap) {
|
||||
deviceMap_.try_emplace(
|
||||
normalizeDevice(srcDevice), normalizeDevice(dstDevice));
|
||||
}
|
||||
if (defaultDevice.has_value()) {
|
||||
defaultDevice_ = normalizeDevice(defaultDevice.value());
|
||||
}
|
||||
}
|
||||
|
||||
c10::Device Placement::getMappedDevice(const c10::Device& srcDevice) const {
|
||||
auto it = deviceMap_.find(normalizeDevice(srcDevice));
|
||||
if (it != deviceMap_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
if (defaultDevice_.has_value()) {
|
||||
return defaultDevice_.value();
|
||||
}
|
||||
return srcDevice;
|
||||
}
|
||||
|
||||
} // namespace torch::nativert
|
57
torch/nativert/executor/Placement.h
Normal file
57
torch/nativert/executor/Placement.h
Normal file
@ -0,0 +1,57 @@
|
||||
#pragma once
|
||||
|
||||
#include <c10/core/Device.h>
|
||||
#include <c10/util/Logging.h>
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
/**
|
||||
* This function returns a normalized version of the input device:
|
||||
* - For CPU devices, the returned device will have no index (i.e., the default
|
||||
* CPU device).
|
||||
* - For CUDA devices, if no index is specified, index 0 is assumed.
|
||||
* - For other device types, the function will raise an error.
|
||||
*
|
||||
* @param device The input c10::Device to normalize.
|
||||
* @return A normalized c10::Device with standardized indexing.
|
||||
*
|
||||
* @throws c10::Error If the device type is not CPU or CUDA.
|
||||
*/
|
||||
|
||||
c10::Device normalizeDevice(const c10::Device& device);
|
||||
|
||||
/**
|
||||
* Returns true if the two devices are the same and has the same device index
|
||||
* (if cuda).
|
||||
*/
|
||||
bool isSameDevice(const c10::Device& device1, const c10::Device& device2);
|
||||
|
||||
/**
|
||||
* @brief A utility class for managing device placement mappings.
|
||||
*
|
||||
* The Placement class provides a way to map source devices to target devices.
|
||||
* It supports both explicit per-device mappings and a default device fallback.
|
||||
* This is the argument taken in NativeRT to map from model artifact device to
|
||||
* the device it should run on.
|
||||
*/
|
||||
struct TORCH_API Placement {
|
||||
Placement() = default;
|
||||
explicit Placement(std::optional<c10::Device> defaultDevice);
|
||||
explicit Placement(
|
||||
const std::unordered_map<c10::Device, c10::Device>& deviceMap,
|
||||
std::optional<c10::Device> defaultDevice = std::nullopt);
|
||||
c10::Device getMappedDevice(const c10::Device& srcDevice) const;
|
||||
|
||||
TORCH_API friend std::ostream& operator<<(
|
||||
std::ostream& os,
|
||||
const Placement& obj);
|
||||
|
||||
protected:
|
||||
std::unordered_map<c10::Device, c10::Device> deviceMap_;
|
||||
std::optional<c10::Device> defaultDevice_;
|
||||
};
|
||||
|
||||
} // namespace torch::nativert
|
37
torch/nativert/executor/PlacementUtils.cpp
Normal file
37
torch/nativert/executor/PlacementUtils.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
#include <torch/nativert/executor/Placement.h>
|
||||
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
namespace torch::nativert {
|
||||
|
||||
c10::Device normalizeDevice(const c10::Device& device) {
|
||||
// cpu device doesn't have index
|
||||
// cuda device index must have a index
|
||||
if (device.is_cpu()) {
|
||||
return c10::Device(c10::DeviceType::CPU);
|
||||
} else if (device.is_cuda()) {
|
||||
return c10::Device(
|
||||
c10::DeviceType::CUDA,
|
||||
device.has_index() ? device.index() : static_cast<c10::DeviceIndex>(0));
|
||||
} else {
|
||||
TORCH_CHECK(false, "Unsupported device type", device);
|
||||
}
|
||||
}
|
||||
|
||||
bool isSameDevice(const c10::Device& a, const c10::Device& b) {
|
||||
if (a.is_cpu()) {
|
||||
return b.is_cpu();
|
||||
}
|
||||
if (a.is_cuda()) {
|
||||
if (b.is_cuda()) {
|
||||
auto aIndex = a.has_index() ? a.index() : 0;
|
||||
auto bIndex = b.has_index() ? b.index() : 0;
|
||||
return aIndex == bIndex;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
TORCH_CHECK(false, "Unsupported device type", a, " and ", b);
|
||||
return false;
|
||||
}
|
||||
} // namespace torch::nativert
|
Reference in New Issue
Block a user