mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Convert logging f-strings to use % format, part five (#98765)
This does some annoying but simple cases by hand. Signed-off-by: Edward Z. Yang <ezyang@meta.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/98765 Approved by: https://github.com/wanchaol
This commit is contained in:
committed by
PyTorch MergeBot
parent
5a7aad9681
commit
b8b840be3d
@ -1393,7 +1393,7 @@ class BenchmarkRunner:
|
|||||||
percentage = psutil.Process(os.getpid()).memory_percent()
|
percentage = psutil.Process(os.getpid()).memory_percent()
|
||||||
peak_mem = percentage * total / 10**9
|
peak_mem = percentage * total / 10**9
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception(f"Backend {mode} failed in warmup()")
|
log.exception("Backend %s failed in warmup()", mode)
|
||||||
return sys.exit(-1)
|
return sys.exit(-1)
|
||||||
dynamo_stats = get_dynamo_stats()
|
dynamo_stats = get_dynamo_stats()
|
||||||
dynamo_stats.subtract(start_stats)
|
dynamo_stats.subtract(start_stats)
|
||||||
|
@ -98,7 +98,6 @@ coverage_ignore_functions = [
|
|||||||
"cudart",
|
"cudart",
|
||||||
"is_bf16_supported",
|
"is_bf16_supported",
|
||||||
# torch.cuda._sanitizer
|
# torch.cuda._sanitizer
|
||||||
"format_log_message",
|
|
||||||
"zip_arguments",
|
"zip_arguments",
|
||||||
"zip_by_key",
|
"zip_by_key",
|
||||||
# torch.distributed.autograd
|
# torch.distributed.autograd
|
||||||
|
@ -61,7 +61,7 @@ def main():
|
|||||||
utilization, mm_conv_utilization = compute_utilization(filenames, total_length)
|
utilization, mm_conv_utilization = compute_utilization(filenames, total_length)
|
||||||
print(f"{modelname}, {utilization}, {mm_conv_utilization}")
|
print(f"{modelname}, {utilization}, {mm_conv_utilization}")
|
||||||
except BaseException:
|
except BaseException:
|
||||||
logging.exception(f"{filename}, ERROR")
|
logging.exception("%s, ERROR", filename)
|
||||||
print(f"{filename}, ERROR")
|
print(f"{filename}, ERROR")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -7,7 +7,6 @@ import platform
|
|||||||
import stat
|
import stat
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import textwrap
|
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -77,16 +76,17 @@ def check(binary_path: Path, reference_hash: str) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
logging.warning(
|
logging.warning(
|
||||||
textwrap.dedent(
|
"""\
|
||||||
f"""\
|
Found binary hash does not match reference!
|
||||||
Found binary hash does not match reference!
|
|
||||||
|
|
||||||
Found hash: {existing_binary_hash}
|
Found hash: %s
|
||||||
Reference hash: {reference_hash}
|
Reference hash: %s
|
||||||
|
|
||||||
Deleting {binary_path} just to be safe.
|
Deleting %s just to be safe.
|
||||||
"""
|
""",
|
||||||
)
|
existing_binary_hash,
|
||||||
|
reference_hash,
|
||||||
|
binary_path,
|
||||||
)
|
)
|
||||||
if DRY_RUN:
|
if DRY_RUN:
|
||||||
logging.critical(
|
logging.critical(
|
||||||
|
@ -784,11 +784,11 @@ class InstructionTranslatorBase(Checkpointable[InstructionTranslatorGraphState])
|
|||||||
if package is not None:
|
if package is not None:
|
||||||
if spec is not None and package != spec.parent:
|
if spec is not None and package != spec.parent:
|
||||||
log.warning(
|
log.warning(
|
||||||
"__package__ != __spec__.parent "
|
"__package__ != __spec__.parent (%r != %r)",
|
||||||
f"({package!r} != {spec.parent!r})",
|
package,
|
||||||
ImportWarning,
|
spec.parent,
|
||||||
stacklevel=3,
|
stacklevel=3,
|
||||||
) # type: ignore[call-arg]
|
)
|
||||||
return package
|
return package
|
||||||
elif spec is not None:
|
elif spec is not None:
|
||||||
return spec.parent
|
return spec.parent
|
||||||
|
@ -321,7 +321,7 @@ def write_record_to_file(filename, exec_record):
|
|||||||
with open(filename, "wb") as f:
|
with open(filename, "wb") as f:
|
||||||
exec_record.dump(f)
|
exec_record.dump(f)
|
||||||
except Exception:
|
except Exception:
|
||||||
log.error(f"Unable to write execution record {filename}", exc_info=1)
|
log.error("Unable to write execution record %s", filename, exc_info=1)
|
||||||
|
|
||||||
|
|
||||||
def count_calls(g: fx.Graph):
|
def count_calls(g: fx.Graph):
|
||||||
|
@ -491,7 +491,7 @@ class _NnapiSerializer:
|
|||||||
raise Exception("Flexible size is not supported for this operand.")
|
raise Exception("Flexible size is not supported for this operand.")
|
||||||
if s < 0:
|
if s < 0:
|
||||||
# runtime flex
|
# runtime flex
|
||||||
LOG.warning(f"Operand {oper} has runtime flex shape")
|
LOG.warning("Operand %s has runtime flex shape", oper)
|
||||||
return op_id, oper
|
return op_id, oper
|
||||||
|
|
||||||
def get_tensor_operand_or_constant(self, jitval, dim_order=DimOrder.PRESUMED_CONTIGUOUS):
|
def get_tensor_operand_or_constant(self, jitval, dim_order=DimOrder.PRESUMED_CONTIGUOUS):
|
||||||
|
@ -332,10 +332,10 @@ or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib6
|
|||||||
if find_je:
|
if find_je:
|
||||||
logger.info("Use JeMalloc memory allocator")
|
logger.info("Use JeMalloc memory allocator")
|
||||||
return
|
return
|
||||||
logger.warning(f"""Neither TCMalloc nor JeMalloc is found in $CONDA_PREFIX/lib or $VIRTUAL_ENV/lib
|
logger.warning("""Neither TCMalloc nor JeMalloc is found in $CONDA_PREFIX/lib or $VIRTUAL_ENV/lib
|
||||||
or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or
|
or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib64 or
|
||||||
{expanduser("~")}/.local/lib/ so the LD_PRELOAD environment variable will not be set.
|
%s/.local/lib/ so the LD_PRELOAD environment variable will not be set.
|
||||||
This may drop the performance""")
|
This may drop the performance""", expanduser("~"))
|
||||||
|
|
||||||
def log_env_var(self, env_var_name=""):
|
def log_env_var(self, env_var_name=""):
|
||||||
if env_var_name in os.environ:
|
if env_var_name in os.environ:
|
||||||
|
@ -142,10 +142,6 @@ class CUDASanitizerErrors(Exception):
|
|||||||
return f"detected {len(self.errors)} errors"
|
return f"detected {len(self.errors)} errors"
|
||||||
|
|
||||||
|
|
||||||
def format_log_message(message: str) -> str:
|
|
||||||
return " ".join(line.strip() for line in message.strip().splitlines())
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TensorInfo:
|
class TensorInfo:
|
||||||
r"""Stores information about a single tensor and recent accesses to it.
|
r"""Stores information about a single tensor and recent accesses to it.
|
||||||
@ -169,27 +165,21 @@ class _TensorsAccessed:
|
|||||||
def ensure_tensor_exists(self, data_ptr: DataPtr) -> None:
|
def ensure_tensor_exists(self, data_ptr: DataPtr) -> None:
|
||||||
if data_ptr not in self.accesses:
|
if data_ptr not in self.accesses:
|
||||||
logger.info(
|
logger.info(
|
||||||
format_log_message(
|
"Found tensor with pointer: %s, but no matching tensor "
|
||||||
f"""
|
"allocation in the trace. Backfilling the trace now. "
|
||||||
Found tensor with pointer: {data_ptr}, but no matching tensor
|
"Perhaps the sanitizer was enabled after some torch operations?",
|
||||||
allocation in the trace. Backfilling the trace now.
|
data_ptr
|
||||||
Perhaps the sanitizer was enabled after some torch operations?
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.create_tensor(data_ptr, None)
|
self.create_tensor(data_ptr, None)
|
||||||
|
|
||||||
def ensure_tensor_does_not_exist(self, data_ptr: DataPtr) -> None:
|
def ensure_tensor_does_not_exist(self, data_ptr: DataPtr) -> None:
|
||||||
if data_ptr in self.accesses:
|
if data_ptr in self.accesses:
|
||||||
logger.info(
|
logger.info(
|
||||||
format_log_message(
|
"Found duplicate tensor allocation in the trace for tensor with "
|
||||||
f"""
|
"pointer: %s. Assuming the trace for tensor deallocation "
|
||||||
Found duplicate tensor allocation in the trace for tensor with
|
"wasn't caught and backfilling it now. "
|
||||||
pointer: {data_ptr}. Assuming the trace for tensor deallocation
|
"Perhaps the sanitizer was enabled after some torch operations?",
|
||||||
wasn't caught and backfilling it now.
|
data_ptr
|
||||||
Perhaps the sanitizer was enabled after some torch operations?
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.delete_tensor(data_ptr)
|
self.delete_tensor(data_ptr)
|
||||||
|
|
||||||
@ -233,53 +223,41 @@ class StreamSynchronizations:
|
|||||||
def _ensure_stream_exists(self, stream: StreamId) -> None:
|
def _ensure_stream_exists(self, stream: StreamId) -> None:
|
||||||
if stream not in self.current_sync_states:
|
if stream not in self.current_sync_states:
|
||||||
logger.info(
|
logger.info(
|
||||||
format_log_message(
|
"Found Stream with id: %s, but no matching stream "
|
||||||
f"""
|
"creation in the trace. Backfilling the trace now. "
|
||||||
Found Stream with id: {stream}, but no matching stream
|
"Perhaps the sanitizer was enabled after some torch operations?",
|
||||||
creation in the trace. Backfilling the trace now.
|
stream
|
||||||
Perhaps the sanitizer was enabled after some torch operations?
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.create_stream(stream)
|
self.create_stream(stream)
|
||||||
|
|
||||||
def _ensure_event_exists(self, event: EventId) -> None:
|
def _ensure_event_exists(self, event: EventId) -> None:
|
||||||
if event not in self.recorded_sync_states:
|
if event not in self.recorded_sync_states:
|
||||||
logger.info(
|
logger.info(
|
||||||
format_log_message(
|
"Found Event with id: %s, but no matching event "
|
||||||
f"""
|
"creation in the trace. Backfilling the trace now. "
|
||||||
Found Event with id: {event}, but no matching event
|
"Perhaps the sanitizer was enabled after some torch operations?",
|
||||||
creation in the trace. Backfilling the trace now.
|
event
|
||||||
Perhaps the sanitizer was enabled after some torch operations?
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.create_event(event)
|
self.create_event(event)
|
||||||
|
|
||||||
def _ensure_event_does_not_exist(self, event: EventId) -> None:
|
def _ensure_event_does_not_exist(self, event: EventId) -> None:
|
||||||
if event in self.recorded_sync_states:
|
if event in self.recorded_sync_states:
|
||||||
logger.info(
|
logger.info(
|
||||||
format_log_message(
|
"Found duplicate event creation in the trace for event with "
|
||||||
f"""
|
"id: %s. Assuming the trace for event deletion wasn't caught "
|
||||||
Found duplicate event creation in the trace for event with
|
"and backfilling it now. "
|
||||||
id: {event}. Assuming the trace for event deletion wasn't caught
|
"Perhaps the sanitizer was enabled after some torch operations?",
|
||||||
and backfilling it now.
|
event
|
||||||
Perhaps the sanitizer was enabled after some torch operations?
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
self.delete_event(event)
|
self.delete_event(event)
|
||||||
|
|
||||||
def create_stream(self, stream: StreamId) -> None:
|
def create_stream(self, stream: StreamId) -> None:
|
||||||
if stream in self.current_sync_states:
|
if stream in self.current_sync_states:
|
||||||
logger.info(
|
logger.info(
|
||||||
format_log_message(
|
"Found duplicate Stream creation in the trace for Stream with "
|
||||||
f"""
|
"id: %s. PyTorch Streams are only created once, so this "
|
||||||
Found duplicate Stream creation in the trace for Stream with
|
"trace entry is ignored.",
|
||||||
id: {stream}. PyTorch Streams are only created once, so this
|
stream
|
||||||
trace entry is ignored.
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.host_sync_state[stream] = 0
|
self.host_sync_state[stream] = 0
|
||||||
|
@ -415,7 +415,7 @@ class ElasticAgent(abc.ABC):
|
|||||||
if group_result.is_failed():
|
if group_result.is_failed():
|
||||||
# workers failed
|
# workers failed
|
||||||
failure = group_result.failures[0]
|
failure = group_result.failures[0]
|
||||||
log.exception(f"worker 0 failed with exit code : {failure.exit_code}")
|
log.exception("worker 0 failed with exit code : %s", failure.exit_code)
|
||||||
else:
|
else:
|
||||||
return group_result.return_values[0] # return rank 0's results
|
return group_result.return_values[0] # return rank 0's results
|
||||||
|
|
||||||
@ -949,5 +949,6 @@ class SimpleElasticAgent(ElasticAgent):
|
|||||||
raise
|
raise
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception(
|
log.exception(
|
||||||
f"Error waiting on exit barrier. Elapsed: {time.time() - start} seconds"
|
"Error waiting on exit barrier. Elapsed: %s seconds",
|
||||||
|
time.time() - start
|
||||||
)
|
)
|
||||||
|
@ -491,9 +491,12 @@ class MultiprocessContext(PContext):
|
|||||||
error_filepath = self.error_files[failed_local_rank]
|
error_filepath = self.error_files[failed_local_rank]
|
||||||
|
|
||||||
log.error(
|
log.error(
|
||||||
f"failed (exitcode: {failed_proc.exitcode})"
|
"failed (exitcode: %s)"
|
||||||
f" local_rank: {failed_local_rank} (pid: {e.pid})"
|
" local_rank: %s (pid: %s)"
|
||||||
f" of fn: {fn_name} (start_method: {self.start_method})",
|
" of fn: %s (start_method: %s)",
|
||||||
|
failed_proc.exitcode,
|
||||||
|
failed_local_rank, e.pid,
|
||||||
|
fn_name, self.start_method,
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -118,7 +118,7 @@ class ProcessFailure:
|
|||||||
self.error_file_data
|
self.error_file_data
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception(f"Failed to parse reply file: {self.error_file}")
|
log.exception("Failed to parse reply file: %s", self.error_file)
|
||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
self._set_no_reply_file()
|
self._set_no_reply_file()
|
||||||
@ -351,9 +351,10 @@ def record(
|
|||||||
else:
|
else:
|
||||||
log.info(
|
log.info(
|
||||||
(
|
(
|
||||||
f"local_rank {rank} FAILED with no error file."
|
"local_rank %s FAILED with no error file."
|
||||||
f" Decorate your entrypoint fn with @record for traceback info."
|
" Decorate your entrypoint fn with @record for traceback info."
|
||||||
f" See: https://pytorch.org/docs/stable/elastic/errors.html"
|
" See: https://pytorch.org/docs/stable/elastic/errors.html",
|
||||||
|
rank
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
@ -132,8 +132,9 @@ class TailLog:
|
|||||||
f.result()
|
f.result()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(
|
log.error(
|
||||||
f"error in log tailor for {self._name}{local_rank}."
|
"error in log tailor for %s%s. %s: %s",
|
||||||
f" {e.__class__.__qualname__}: {e}",
|
self._name, local_rank,
|
||||||
|
e.__class__.__qualname__, e,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._threadpool:
|
if self._threadpool:
|
||||||
|
@ -329,5 +329,5 @@ class FileTimerServer:
|
|||||||
log.info("Process with pid=%s does not exist. Skipping", worker_pid)
|
log.info("Process with pid=%s does not exist. Skipping", worker_pid)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error terminating pid={worker_pid}", exc_info=e)
|
log.error("Error terminating pid=%s", worker_pid, exc_info=e)
|
||||||
return False
|
return False
|
||||||
|
@ -121,5 +121,5 @@ class LocalTimerServer(TimerServer):
|
|||||||
log.info("Process with pid=%s does not exist. Skipping", worker_id)
|
log.info("Process with pid=%s does not exist. Skipping", worker_id)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error terminating pid={worker_id}", exc_info=e)
|
log.error("Error terminating pid=%s", worker_id, exc_info=e)
|
||||||
return False
|
return False
|
||||||
|
@ -148,10 +148,11 @@ if is_available():
|
|||||||
# Ignore type error because mypy doesn't handle dynamically generated type objects (#4865)
|
# Ignore type error because mypy doesn't handle dynamically generated type objects (#4865)
|
||||||
if backend != BackendType.TENSORPIPE: # type: ignore[attr-defined]
|
if backend != BackendType.TENSORPIPE: # type: ignore[attr-defined]
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"RPC was initialized with no explicit backend but with options " # type: ignore[attr-defined]
|
"RPC was initialized with no explicit backend but with options " # type: ignore[attr-defined]
|
||||||
f"corresponding to {backend}, hence that backend will be used "
|
"corresponding to %(backend)s, hence that backend will be used "
|
||||||
f"instead of the default {BackendType.TENSORPIPE}. To silence this "
|
"instead of the default BackendType.TENSORPIPE. To silence this "
|
||||||
f"warning pass `backend={backend}` explicitly."
|
"warning pass `backend=%(backend)s` explicitly.",
|
||||||
|
{'backend': backend}
|
||||||
)
|
)
|
||||||
|
|
||||||
if backend is None:
|
if backend is None:
|
||||||
|
@ -207,7 +207,7 @@ class CapabilityBasedPartitioner:
|
|||||||
|
|
||||||
logger.debug("Partitions proposed:")
|
logger.debug("Partitions proposed:")
|
||||||
for id, partition in partitions_by_id.items():
|
for id, partition in partitions_by_id.items():
|
||||||
logger.debug(f"partition #{id}", [node.name for node in partition.nodes])
|
logger.debug("partition #%s: %s", id, [node.name for node in partition.nodes])
|
||||||
|
|
||||||
return list(partitions_by_id.values())
|
return list(partitions_by_id.values())
|
||||||
|
|
||||||
|
@ -280,7 +280,7 @@ class PassManager:
|
|||||||
modified = modified or res.modified
|
modified = modified or res.modified
|
||||||
|
|
||||||
if isinstance(module, GraphModule):
|
if isinstance(module, GraphModule):
|
||||||
logger.debug(f"Graph after pass '{fn_name}':", module.graph)
|
logger.debug("Graph after pass '%s': %s", fn_name, module.graph)
|
||||||
module.recompile()
|
module.recompile()
|
||||||
|
|
||||||
# Check graph invariants
|
# Check graph invariants
|
||||||
|
@ -450,7 +450,7 @@ class _MinimizerBase:
|
|||||||
report.append(f"Sequential traverse iteration {self.iteration}.")
|
report.append(f"Sequential traverse iteration {self.iteration}.")
|
||||||
report.append(f"Visit node: {node.name}")
|
report.append(f"Visit node: {node.name}")
|
||||||
|
|
||||||
_LOGGER.info(f"Visit node: {node.name}")
|
_LOGGER.info("Visit node: %s", node.name)
|
||||||
cur_nodes: NodeSet = {node}
|
cur_nodes: NodeSet = {node}
|
||||||
|
|
||||||
if node in self.fusions:
|
if node in self.fusions:
|
||||||
|
@ -63,7 +63,7 @@ def _calc___package__(globals):
|
|||||||
if package is not None:
|
if package is not None:
|
||||||
if spec is not None and package != spec.parent:
|
if spec is not None and package != spec.parent:
|
||||||
_warnings.warn( # noqa: G010
|
_warnings.warn( # noqa: G010
|
||||||
"__package__ != __spec__.parent " f"({package!r} != {spec.parent!r})",
|
f"__package__ != __spec__.parent ({package!r} != {spec.parent!r})", # noqa: G004
|
||||||
ImportWarning,
|
ImportWarning,
|
||||||
stacklevel=3,
|
stacklevel=3,
|
||||||
)
|
)
|
||||||
|
@ -244,8 +244,9 @@ class Trainer:
|
|||||||
if trainer_has_less_inputs:
|
if trainer_has_less_inputs:
|
||||||
input_batches = batches[: len(batches) // 2]
|
input_batches = batches[: len(batches) // 2]
|
||||||
gLogger.info(
|
gLogger.info(
|
||||||
f"""Trainer reduced input patches from {len(batches)}
|
"Trainer reduced input patches from %s "
|
||||||
to {len(input_batches)} to simulate uneven inputs."""
|
"to %s to simulate uneven inputs.",
|
||||||
|
len(batches), len(input_batches)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
input_batches = batches
|
input_batches = batches
|
||||||
|
Reference in New Issue
Block a user