Add support for backend to register reducer timer

Currently by default, reduce timer registration
is expected for all backend. if timer is not
registered throws assert in set_runtime_stats_and_log()

To allow registration of reducer timer for other
backends, moved the timer registration to another
file decoupling the internal interface.

Signed-off-by: Jeeja <jeejakp@habana.ai>

Fixes #ISSUE_NUMBER

Pull Request resolved: https://github.com/pytorch/pytorch/pull/71700
Approved by: https://github.com/rohan-varma
This commit is contained in:
Jeeja
2022-03-24 21:52:25 +00:00
committed by PyTorch MergeBot
parent 5fbe8b1966
commit 13ebcf3723
3 changed files with 77 additions and 70 deletions

View File

@ -14,6 +14,7 @@
#include <c10d/Utils.hpp>
#include <c10d/comm.hpp>
#include <c10d/debug.h>
#include <c10d/reducer_timer.hpp>
#include <c10d/default_comm_hooks.hpp>
#include <torch/csrc/autograd/function.h>
#include <torch/csrc/autograd/profiler.h>
@ -28,77 +29,10 @@ constexpr int kDefaultFirstBucketBytes = int(1024 * 1024);
constexpr int kDefaultBucketBytesCap = int(25 * 1024 * 1024);
// Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations.
constexpr int kDDPRuntimeLoggingSampleRate = 100;
constexpr int kUnsetTime = -1;
inline int64_t current_time_in_nanos() {
return torch::profiler::impl::getTime();
}
// Forward declaration
class Logger;
class TORCH_API Timer {
private:
// The timestamp of forward call start time in each iteration.
int64_t forward_start_time = kUnsetTime;
// The timestamp of backward computation start and end time in each
// iteration.
int64_t backward_compute_start_time = kUnsetTime;
int64_t backward_compute_end_time = kUnsetTime;
// The timestamp of first communication call start time in each iteration.
int64_t backward_comm_start_time = kUnsetTime;
// The timestamp of last communication call end time in each iteration.
int64_t backward_comm_end_time = kUnsetTime;
public:
enum class Event {
kForwardStart,
kBackwardComputeStart,
kBackwardComputeEnd,
kBackwardCommStart,
kBackwardCommEnd,
};
// Record the current event, i.e., mark it as having occurred now. Default
// CPU implementation.
virtual void record(Event event) {
getTimeRef(event) = current_time_in_nanos();
}
// Return the difference between when two events occurred, in nanoseconds.
// Or nullopt if one of them hasn't been recorded.
virtual c10::optional<int64_t> measureDifference(Event start, Event end) = 0;
virtual ~Timer() = default;
// Return host-side timestamp, or nullopt if it has not yet been recorded.
c10::optional<int64_t> getTimestamp(Event event) {
auto time = getTimeRef(event);
if (time == kUnsetTime) {
return c10::nullopt;
} else {
return time;
}
}
// Return host-side time member variable corresponding to the given event.
int64_t& getTimeRef(Event event) {
switch (event) {
case Event::kForwardStart:
return forward_start_time;
case Event::kBackwardComputeStart:
return backward_compute_start_time;
case Event::kBackwardComputeEnd:
return backward_compute_end_time;
case Event::kBackwardCommStart:
return backward_comm_start_time;
case Event::kBackwardCommEnd:
return backward_comm_end_time;
default:
TORCH_INTERNAL_ASSERT(false);
}
}
};
// Local accumulator type for a single bucket.
struct BucketAccumulator {
std::vector<size_t> indices;
@ -106,8 +40,6 @@ struct BucketAccumulator {
size_t size_limit = 0;
};
C10_DECLARE_TYPED_REGISTRY(TimerRegistry, c10::DeviceType, Timer, std::unique_ptr, c10::Device);
class TORCH_API Reducer {
public:
// The constructor takes a list of variables (i.e. parameters) for this

View File

@ -1,4 +1,4 @@
#include <c10d/reducer.hpp>
#include <c10d/reducer_timer.hpp>
#include <c10/core/DeviceGuard.h>
#include <ATen/cuda/CUDAEvent.h>

View File

@ -0,0 +1,75 @@
#pragma once
#include <torch/csrc/autograd/profiler.h>
namespace c10d {
constexpr int kUnsetTime = -1;
inline int64_t current_time_in_nanos() {
return torch::profiler::impl::getTime();
}
class TORCH_API Timer {
private:
// The timestamp of forward call start time in each iteration.
int64_t forward_start_time = kUnsetTime;
// The timestamp of backward computation start and end time in each
// iteration.
int64_t backward_compute_start_time = kUnsetTime;
int64_t backward_compute_end_time = kUnsetTime;
// The timestamp of first communication call start time in each iteration.
int64_t backward_comm_start_time = kUnsetTime;
// The timestamp of last communication call end time in each iteration.
int64_t backward_comm_end_time = kUnsetTime;
public:
enum class Event {
kForwardStart,
kBackwardComputeStart,
kBackwardComputeEnd,
kBackwardCommStart,
kBackwardCommEnd,
};
// Record the current event, i.e., mark it as having occurred now. Default
// CPU implementation.
virtual void record(Event event) {
getTimeRef(event) = current_time_in_nanos();
}
// Return the difference between when two events occurred, in nanoseconds.
// Or nullopt if one of them hasn't been recorded.
virtual c10::optional<int64_t> measureDifference(Event start, Event end) = 0;
virtual ~Timer() = default;
// Return host-side timestamp, or nullopt if it has not yet been recorded.
c10::optional<int64_t> getTimestamp(Event event) {
auto time = getTimeRef(event);
if (time == kUnsetTime) {
return c10::nullopt;
} else {
return time;
}
}
// Return host-side time member variable corresponding to the given event.
int64_t& getTimeRef(Event event) {
switch (event) {
case Event::kForwardStart:
return forward_start_time;
case Event::kBackwardComputeStart:
return backward_compute_start_time;
case Event::kBackwardComputeEnd:
return backward_compute_end_time;
case Event::kBackwardCommStart:
return backward_comm_start_time;
case Event::kBackwardCommEnd:
return backward_comm_end_time;
default:
TORCH_INTERNAL_ASSERT(false);
}
}
};
C10_DECLARE_TYPED_REGISTRY(TimerRegistry, c10::DeviceType, Timer, std::unique_ptr, c10::Device);
} // namespace c10d