[PT] expose FlightRecord API for building (#154866)

Summary: as titled

Test Plan:
CI

Rollback Plan:

Differential Revision: D75803611

Pull Request resolved: https://github.com/pytorch/pytorch/pull/154866
Approved by: https://github.com/fduwjj, https://github.com/d4l3k
This commit is contained in:
Feng Tian
2025-06-04 01:25:48 +00:00
committed by PyTorch MergeBot
parent d8e4c1c363
commit e2760544fa

View File

@ -162,7 +162,7 @@ struct FlightRecorder {
// acquire the GIL. If you don't want to block the current thread or take
// the risk of a GIL deadlock, you can use an asynchronous calling mechanism
// like std::async.
std::string getTraceback();
TORCH_API std::string getTraceback();
};
bool enabled_ = false;
@ -192,7 +192,7 @@ struct FlightRecorder {
std::shared_ptr<ProcessGroupStatus> pg_status,
bool isP2P);
void record_pg_ranks(
TORCH_API void record_pg_ranks(
const std::tuple<std::string, std::string>& pg_name,
std::vector<uint64_t> ranks);
@ -204,7 +204,7 @@ struct FlightRecorder {
// Returns the entry with the given id, if it exists. Otherwise, returns
// std::nullopt.
std::optional<Entry> getEntry(std::optional<size_t> id);
TORCH_API std::optional<Entry> getEntry(std::optional<size_t> id);
/*
Mark an Event as completed and free its events.
@ -216,7 +216,9 @@ struct FlightRecorder {
never hang. (timing must also be enabled for compute_duration - see
TORCH_NCCL_ENABLE_TIMING).
*/
void retire_id(std::optional<size_t> id, bool compute_duration = true);
TORCH_API void retire_id(
std::optional<size_t> id,
bool compute_duration = true);
const c10::List<c10::IValue> getCollectiveTrace(
bool includeStacktraces,