Revert "[BE]: Enable RUFF TRY400 rule - log.exception (#153473)"

This reverts commit 4f4ecc583e0f48ad2d062a53bf91c61ab40b4948.

Reverted https://github.com/pytorch/pytorch/pull/153473 on behalf of https://github.com/jeanschmidt due to seems to have broken internal signals, @albanD may I count on you to help the author merge his PR? D74837988 ([comment](https://github.com/pytorch/pytorch/pull/153473#issuecomment-2886017075))
This commit is contained in:
PyTorch MergeBot
2025-05-16 08:29:26 +00:00
parent 86c6f71ddb
commit 3443627e07
23 changed files with 46 additions and 51 deletions

View File

@ -16,9 +16,7 @@ ignore =
# these ignores are from flake8-comprehensions; please fix!
C407,
# these ignores are from flake8-logging-format; please fix!
G100,G101,G200,
# G201 replaced by LOG400 in ruff
G201,
G100,G101,G200
# these ignores are from flake8-simplify. please fix or ignore with commented reason
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
# SIM104 is already covered by pyupgrade ruff

View File

@ -623,9 +623,9 @@ def main() -> None:
is_canary,
)
except Exception:
log.exception(
"Failed to get issue. Defaulting to Meta runners and no experiments."
except Exception as e:
log.error(
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
)
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)

View File

@ -1700,8 +1700,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
)
)
except Exception:
log.exception("Failed to save memory snapshot")
except Exception as e:
log.error("Failed to save memory snapshot, %s", e)
torch.cuda.memory._record_memory_history(enabled=None)
@ -2742,7 +2742,7 @@ class BenchmarkRunner:
try:
shutil.move("repro.py", f"{repro_dir}/{name}_repro.py")
except OSError:
log.exception("Could not find repro script for model %s", name)
log.error("Could not find repro script for model %s", name)
else:
log.info(
"Repro script for model %s with minified graph saved to %s",

View File

@ -197,7 +197,6 @@ select = [
"TC",
"TRY002", # ban vanilla raise (todo fix NOQAs)
"TRY203",
"TRY400", # use logging.exception
"TRY401", # verbose-log-message
"UP",
"YTT",

View File

@ -47,15 +47,11 @@ def requirements_installed() -> bool:
return True
except ImportError:
logger.error( # noqa: TRY400
"Requirements not installed, run the following command to install:",
exc_info=False,
logger.error(
"Requirements not installed, run the following command to install:"
)
logger.error( # noqa: TRY400
" > %s -m pip install -r %s/requirements.txt",
sys.executable,
ROOT_PATH,
exc_info=False,
logger.error(
" > %s -m pip install -r %s/requirements.txt", sys.executable, ROOT_PATH
)
return False

View File

@ -138,7 +138,7 @@ def wrap_compiler_debug(
example_inputs,
compiler_name,
)
log.exception("CompilerError")
log.error("CompilerError")
raise
# We may run regular PyTorch compute that may trigger Dynamo, do NOT

View File

@ -2148,7 +2148,7 @@ def torchscript(model, example_inputs, verbose=False):
if verbose:
log.exception("jit error")
else:
log.error("Both torch.jit.trace and torch.jit.script failed") # noqa: TRY400
log.error("Both torch.jit.trace and torch.jit.script failed")
return None

View File

@ -359,7 +359,7 @@ class Guard:
except Exception:
log.exception("Error while creating guard:\n%s", str(self).rstrip())
if self.stack:
log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip()) # noqa: TRY400
log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())
raise
def is_specialized_nn_module(self):

View File

@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
major, minor = torch.cuda.get_device_capability(0)
return str(major * 10 + minor)
return str(cuda_arch)
except Exception:
log.exception("Error getting cuda arch")
except Exception as e:
log.error("Error getting cuda arch: %s", e)
return None
@ -35,8 +35,8 @@ def get_cuda_version() -> Optional[str]:
if cuda_version is None:
cuda_version = torch.version.cuda
return cuda_version
except Exception:
log.exception("Error getting cuda version")
except Exception as e:
log.error("Error getting cuda version: %s", e)
return None

View File

@ -181,7 +181,7 @@ def _fx_compile_mode_default() -> tuple[FxCompileMode, bool]:
import logging
log = logging.getLogger(__name__)
log.error( # noqa: TRY400
log.error(
"Invalid value of %s for %s. Expected one of %s. Using default.",
value,
name,

View File

@ -796,13 +796,13 @@ def create_node_mapping(
except Exception as e:
# Since this is just logging code, it should never interfere with regular
# program execution, so we use this try-except to guard against any error
log.error("Unexpected error in create_node_mapping: %s", e) # noqa: TRY400
log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json) # noqa: TRY400
log.error( # noqa: TRY400
log.error("Unexpected error in create_node_mapping: %s", e)
log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json)
log.error(
"triton_kernel_to_post_grad_json: %s", triton_kernel_to_post_grad_json
)
log.error("pre_grad_graph_id: %s", pre_grad_graph_id) # noqa: TRY400
log.error(traceback.format_exc()) # noqa: TRY400
log.error("pre_grad_graph_id: %s", pre_grad_graph_id)
log.error(traceback.format_exc())
return empty_return

View File

@ -718,7 +718,7 @@ class CompiledFxGraph(OutputCode):
)
self.compiled_fn_runner = getattr(code_cache, "runner", None)
except OSError:
log.exception("Failed to load artifact: %s", artifact_path)
log.error("Failed to load artifact: %s", artifact_path)
raise
return artifact_path

View File

@ -2238,9 +2238,9 @@ class AlgorithmSelectorCache(PersistentCache):
try:
timing = cls.benchmark_choice(choice, autotune_args)
except CUDACompileError as e:
log.error( # noqa: TRY400
log.error(
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
e,
str(e),
)
timing = float("inf")
except NotImplementedError as e:
@ -2253,7 +2253,7 @@ class AlgorithmSelectorCache(PersistentCache):
else:
if "illegal memory access" in msg:
msg += "\n\nEither error in template or triton bug.\n"
log.error( # noqa: TRY400
log.error(
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
msg,
)

View File

@ -60,7 +60,7 @@ def _orthogonalize_gram_schmidt(matrices, epsilon=0):
try:
col /= torch.norm(col, dim=1, keepdim=True)
except ZeroDivisionError:
logger.exception(
logger.error(
"The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 "
"as `orthogonalization_epsilon` in PowerSGD state."
)

View File

@ -235,7 +235,9 @@ class _AsyncCheckpointProcess:
f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}" # noqa: G004
)
except BaseException as e:
logger.exception("Checkpoint background process encountered an exception")
logger.error(
f"Checkpoint background process encountered an exception: {e}" # noqa: G004
)
parent_conn.send(e)
raise
finally:

View File

@ -90,7 +90,7 @@ def _dcp_method_logger(
msg_dict["event"] = "exception"
msg_dict["error"] = f"{error}"
msg_dict["time"] = time.time_ns()
_dcp_logger.error(msg_dict) # noqa: TRY400
_dcp_logger.error(msg_dict)
raise
# end event

View File

@ -141,7 +141,7 @@ class TailLog:
try:
f.result()
except Exception as e:
logger.error( # noqa: TRY400
logger.error(
"error in log tailor for %s%s. %s: %s",
self._name,
local_rank,

View File

@ -1419,7 +1419,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
# do the communication
_wait_batch_p2p(_batch_p2p(ops))
except Exception as e:
logger.error( # noqa: TRY400
logger.error(
"[Rank %s] pipeline schedule %s caught the following exception \
at time_step %s when running action %s",
self.rank,
@ -1427,7 +1427,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
time_step,
action,
)
logger.error( # noqa: TRY400
logger.error(
"%s",
_format_pipeline_order(
self.pipeline_order, error_step_number=time_step
@ -1739,7 +1739,7 @@ class _PipelineScheduleRuntime(PipelineScheduleMulti):
else:
raise ValueError(f"{action=} is unknown or unsupported")
except Exception as e:
logger.error( # noqa: TRY400
logger.error(
"_PipelineScheduleRuntime caught exception at step %s when running action %s. Full Schedule:",
time_step,
action,

View File

@ -31,7 +31,7 @@ def _group_membership_management(store, name, is_join):
try:
store.wait([returned])
except RuntimeError:
logger.error( # noqa: TRY400
logger.error(
"Group membership token %s timed out waiting for %s to be released.",
my_token,
returned,

View File

@ -297,7 +297,7 @@ def _barrier(worker_names):
try:
_all_gather(None, set(worker_names))
except RuntimeError as ex:
logger.error("Failed to complete barrier, got error %s", ex) # noqa: TRY400
logger.error("Failed to complete barrier, got error %s", ex)
@_require_initialized
@ -312,7 +312,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
try:
_all_gather(None, timeout=timeout)
except RuntimeError as ex:
logger.error( # noqa: TRY400
logger.error(
"Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
)
raise ex

View File

@ -1135,7 +1135,7 @@ def _log_export_wrapper(fn):
error_type = t.__module__ + "." + t.__qualname__
case_name = get_class_if_classified_error(e)
if case_name is not None:
log.error(exportdb_error_message(case_name)) # noqa: TRY400
log.error(exportdb_error_message(case_name))
log_export_usage(
event="export.error.classified",
type=error_type,

View File

@ -312,7 +312,7 @@ def record_shapeenv_event(
if not shape_env.should_record_events or shape_env.is_recording:
# If ShapeEnv is disabled or already recording an event, re-raise the exception without logging.
raise
log.error( # noqa: G201, TRY400
log.error( # noqa: G201
"failed while running %s(*%s, **%s)",
name,
args[1:],
@ -349,7 +349,7 @@ def replay_shape_env_events(events):
# change after each event is replayed.
event.run(shape_env)
except Exception:
log.error("failed when running event: %s", event) # noqa: TRY400
log.error("failed when running event: %s", event)
raise
return shape_env

View File

@ -756,7 +756,7 @@ class MultiProcessTestCase(TestCase):
)
sys.exit(TEST_SKIPS["generic"].exit_code)
except Exception:
logger.error( # noqa: TRY400
logger.error(
"Caught exception: \n%s exiting " "process %s with exit code: %s",
traceback.format_exc(),
self.rank,
@ -791,7 +791,7 @@ class MultiProcessTestCase(TestCase):
pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
pipes.append((i, pipe))
except ConnectionError as e:
logger.error( # noqa: TRY400
logger.error(
"Encountered error while trying to get traceback for process %s: %s",
i,
e,
@ -818,7 +818,7 @@ class MultiProcessTestCase(TestCase):
"Could not retrieve traceback for timed out process: %s", rank
)
except ConnectionError as e:
logger.error( # noqa: TRY400
logger.error(
"Encountered error while trying to get traceback for process %s: %s",
rank,
e,