mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
[wall_clock_breakdown] always log stats when enabled (#7617)
currently when main logger is WARN level, `wall_clock_breakdown: true` never logs - which is invalid as it disables this crucial at times functionality. Plus I think we have a disconnect somewhere since the recently added `--log_level` flag doesn't seem to change this logger's level. The future plan is to be able to have different log levels for different modules, but for now just use `print` if `wall_clock_breakdown` is `True`, so this functionality is not log-level dependent. `print` is also less noisy than the logger, because of the long prefix generated by the latter, which is of no value to the user since we print stats and not code related logs, so the printed results are easier to digest. Signed-off-by: Stas Bekman <stas@stason.org>
This commit is contained in:
@ -83,7 +83,7 @@ def print_configuration(args, name):
|
|||||||
logger.info(" {} {} {}".format(arg, dots, getattr(args, arg)))
|
logger.info(" {} {} {}".format(arg, dots, getattr(args, arg)))
|
||||||
|
|
||||||
|
|
||||||
def log_dist(message, ranks=None, level=logging.INFO):
|
def log_dist(message, ranks=None, level=logging.INFO, use_logger=True):
|
||||||
from deepspeed import comm as dist
|
from deepspeed import comm as dist
|
||||||
"""Log message when one of following condition meets
|
"""Log message when one of following condition meets
|
||||||
|
|
||||||
@ -94,6 +94,7 @@ def log_dist(message, ranks=None, level=logging.INFO):
|
|||||||
message (str)
|
message (str)
|
||||||
ranks (list)
|
ranks (list)
|
||||||
level (int)
|
level (int)
|
||||||
|
use_logger (bool): if `False` ignores the log-levels and always prints
|
||||||
|
|
||||||
"""
|
"""
|
||||||
should_log = not dist.is_initialized()
|
should_log = not dist.is_initialized()
|
||||||
@ -104,7 +105,10 @@ def log_dist(message, ranks=None, level=logging.INFO):
|
|||||||
should_log = should_log or (my_rank in set(ranks))
|
should_log = should_log or (my_rank in set(ranks))
|
||||||
if should_log:
|
if should_log:
|
||||||
final_message = "[Rank {}] {}".format(my_rank, message)
|
final_message = "[Rank {}] {}".format(my_rank, message)
|
||||||
logger.log(level, final_message)
|
if use_logger:
|
||||||
|
logger.log(level, final_message)
|
||||||
|
else:
|
||||||
|
print(final_message)
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(None)
|
@functools.lru_cache(None)
|
||||||
|
@ -148,7 +148,8 @@ class SynchronizedWallClockTimer:
|
|||||||
elapsed_time = (self.timers[name].elapsed(reset=reset) / normalizer)
|
elapsed_time = (self.timers[name].elapsed(reset=reset) / normalizer)
|
||||||
string += " | {}: {:.2f}".format(name, elapsed_time)
|
string += " | {}: {:.2f}".format(name, elapsed_time)
|
||||||
|
|
||||||
log_dist(string, ranks=ranks or [0])
|
# timers logging should be independent of the global log level it's already conditional on wall_clock_breakdown being True, so using use_logger=False will always print the stats
|
||||||
|
log_dist(string, ranks=ranks or [0], use_logger=False)
|
||||||
|
|
||||||
def get_mean(self, names, normalizer=1.0, reset=True):
|
def get_mean(self, names, normalizer=1.0, reset=True):
|
||||||
"""Get the mean of a group of timers."""
|
"""Get the mean of a group of timers."""
|
||||||
|
Reference in New Issue
Block a user