From 3443627e078deb813ae37f7182d41a802bd05ac4 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Fri, 16 May 2025 08:29:26 +0000 Subject: [PATCH] Revert "[BE]: Enable RUFF TRY400 rule - log.exception (#153473)" This reverts commit 4f4ecc583e0f48ad2d062a53bf91c61ab40b4948. Reverted https://github.com/pytorch/pytorch/pull/153473 on behalf of https://github.com/jeanschmidt due to seems to have broken internal signals, @albanD may I count on you to help the author merge his PR? D74837988 ([comment](https://github.com/pytorch/pytorch/pull/153473#issuecomment-2886017075)) --- .flake8 | 4 +--- .github/scripts/runner_determinator.py | 6 +++--- benchmarks/dynamo/common.py | 6 +++--- pyproject.toml | 1 - tools/packaging/split_wheel.py | 12 ++++-------- torch/_dynamo/repro/after_aot.py | 2 +- torch/_dynamo/utils.py | 2 +- torch/_guards.py | 2 +- torch/_inductor/codegen/cuda/cuda_env.py | 8 ++++---- torch/_inductor/compile_fx.py | 2 +- torch/_inductor/debug.py | 10 +++++----- torch/_inductor/output_code.py | 2 +- torch/_inductor/select_algorithm.py | 6 +++--- .../algorithms/ddp_comm_hooks/powerSGD_hook.py | 2 +- .../checkpoint/_async_process_executor.py | 4 +++- torch/distributed/checkpoint/logger.py | 2 +- .../distributed/elastic/multiprocessing/tail_log.py | 2 +- torch/distributed/pipelining/schedules.py | 6 +++--- torch/distributed/rpc/_utils.py | 2 +- torch/distributed/rpc/api.py | 4 ++-- torch/export/_trace.py | 2 +- torch/fx/experimental/recording.py | 4 ++-- torch/testing/_internal/common_distributed.py | 6 +++--- 23 files changed, 46 insertions(+), 51 deletions(-) diff --git a/.flake8 b/.flake8 index 042bfbe00a3f..2d854aa6b258 100644 --- a/.flake8 +++ b/.flake8 @@ -16,9 +16,7 @@ ignore = # these ignores are from flake8-comprehensions; please fix! C407, # these ignores are from flake8-logging-format; please fix! - G100,G101,G200, - # G201 replaced by LOG400 in ruff - G201, + G100,G101,G200 # these ignores are from flake8-simplify. please fix or ignore with commented reason SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12, # SIM104 is already covered by pyupgrade ruff diff --git a/.github/scripts/runner_determinator.py b/.github/scripts/runner_determinator.py index 5617f7bfa240..1481459d40c4 100644 --- a/.github/scripts/runner_determinator.py +++ b/.github/scripts/runner_determinator.py @@ -623,9 +623,9 @@ def main() -> None: is_canary, ) - except Exception: - log.exception( - "Failed to get issue. Defaulting to Meta runners and no experiments." + except Exception as e: + log.error( + f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}" ) set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix) diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index b2fa8d6f0d5d..2273d2570ecf 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -1700,8 +1700,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix): f"{output_filename.rstrip('.csv')}_{suffix}.pickle", ) ) - except Exception: - log.exception("Failed to save memory snapshot") + except Exception as e: + log.error("Failed to save memory snapshot, %s", e) torch.cuda.memory._record_memory_history(enabled=None) @@ -2742,7 +2742,7 @@ class BenchmarkRunner: try: shutil.move("repro.py", f"{repro_dir}/{name}_repro.py") except OSError: - log.exception("Could not find repro script for model %s", name) + log.error("Could not find repro script for model %s", name) else: log.info( "Repro script for model %s with minified graph saved to %s", diff --git a/pyproject.toml b/pyproject.toml index e045eecf3c0e..7efbf270a50f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -197,7 +197,6 @@ select = [ "TC", "TRY002", # ban vanilla raise (todo fix NOQAs) "TRY203", - "TRY400", # use logging.exception "TRY401", # verbose-log-message "UP", "YTT", diff --git a/tools/packaging/split_wheel.py b/tools/packaging/split_wheel.py index c6cf50e984fe..1aa77aa5c694 100644 --- a/tools/packaging/split_wheel.py +++ b/tools/packaging/split_wheel.py @@ -47,15 +47,11 @@ def requirements_installed() -> bool: return True except ImportError: - logger.error( # noqa: TRY400 - "Requirements not installed, run the following command to install:", - exc_info=False, + logger.error( + "Requirements not installed, run the following command to install:" ) - logger.error( # noqa: TRY400 - " > %s -m pip install -r %s/requirements.txt", - sys.executable, - ROOT_PATH, - exc_info=False, + logger.error( + " > %s -m pip install -r %s/requirements.txt", sys.executable, ROOT_PATH ) return False diff --git a/torch/_dynamo/repro/after_aot.py b/torch/_dynamo/repro/after_aot.py index 55ca964ec5c2..00e412c7cc58 100644 --- a/torch/_dynamo/repro/after_aot.py +++ b/torch/_dynamo/repro/after_aot.py @@ -138,7 +138,7 @@ def wrap_compiler_debug( example_inputs, compiler_name, ) - log.exception("CompilerError") + log.error("CompilerError") raise # We may run regular PyTorch compute that may trigger Dynamo, do NOT diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py index 335f4272c172..d6e758b76fbd 100644 --- a/torch/_dynamo/utils.py +++ b/torch/_dynamo/utils.py @@ -2148,7 +2148,7 @@ def torchscript(model, example_inputs, verbose=False): if verbose: log.exception("jit error") else: - log.error("Both torch.jit.trace and torch.jit.script failed") # noqa: TRY400 + log.error("Both torch.jit.trace and torch.jit.script failed") return None diff --git a/torch/_guards.py b/torch/_guards.py index 33c3b958d53c..8a763ff68e33 100644 --- a/torch/_guards.py +++ b/torch/_guards.py @@ -359,7 +359,7 @@ class Guard: except Exception: log.exception("Error while creating guard:\n%s", str(self).rstrip()) if self.stack: - log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip()) # noqa: TRY400 + log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip()) raise def is_specialized_nn_module(self): diff --git a/torch/_inductor/codegen/cuda/cuda_env.py b/torch/_inductor/codegen/cuda/cuda_env.py index c4d8ec40b087..95be434e03b7 100644 --- a/torch/_inductor/codegen/cuda/cuda_env.py +++ b/torch/_inductor/codegen/cuda/cuda_env.py @@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]: major, minor = torch.cuda.get_device_capability(0) return str(major * 10 + minor) return str(cuda_arch) - except Exception: - log.exception("Error getting cuda arch") + except Exception as e: + log.error("Error getting cuda arch: %s", e) return None @@ -35,8 +35,8 @@ def get_cuda_version() -> Optional[str]: if cuda_version is None: cuda_version = torch.version.cuda return cuda_version - except Exception: - log.exception("Error getting cuda version") + except Exception as e: + log.error("Error getting cuda version: %s", e) return None diff --git a/torch/_inductor/compile_fx.py b/torch/_inductor/compile_fx.py index 941b02d45a9e..784d6e80311b 100644 --- a/torch/_inductor/compile_fx.py +++ b/torch/_inductor/compile_fx.py @@ -181,7 +181,7 @@ def _fx_compile_mode_default() -> tuple[FxCompileMode, bool]: import logging log = logging.getLogger(__name__) - log.error( # noqa: TRY400 + log.error( "Invalid value of %s for %s. Expected one of %s. Using default.", value, name, diff --git a/torch/_inductor/debug.py b/torch/_inductor/debug.py index a509329fe1fc..140d99d4b7d0 100644 --- a/torch/_inductor/debug.py +++ b/torch/_inductor/debug.py @@ -796,13 +796,13 @@ def create_node_mapping( except Exception as e: # Since this is just logging code, it should never interfere with regular # program execution, so we use this try-except to guard against any error - log.error("Unexpected error in create_node_mapping: %s", e) # noqa: TRY400 - log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json) # noqa: TRY400 - log.error( # noqa: TRY400 + log.error("Unexpected error in create_node_mapping: %s", e) + log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json) + log.error( "triton_kernel_to_post_grad_json: %s", triton_kernel_to_post_grad_json ) - log.error("pre_grad_graph_id: %s", pre_grad_graph_id) # noqa: TRY400 - log.error(traceback.format_exc()) # noqa: TRY400 + log.error("pre_grad_graph_id: %s", pre_grad_graph_id) + log.error(traceback.format_exc()) return empty_return diff --git a/torch/_inductor/output_code.py b/torch/_inductor/output_code.py index 10214143ae95..2218f2c0dbe0 100644 --- a/torch/_inductor/output_code.py +++ b/torch/_inductor/output_code.py @@ -718,7 +718,7 @@ class CompiledFxGraph(OutputCode): ) self.compiled_fn_runner = getattr(code_cache, "runner", None) except OSError: - log.exception("Failed to load artifact: %s", artifact_path) + log.error("Failed to load artifact: %s", artifact_path) raise return artifact_path diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index c1f1ab25d564..9916d82b69f5 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -2238,9 +2238,9 @@ class AlgorithmSelectorCache(PersistentCache): try: timing = cls.benchmark_choice(choice, autotune_args) except CUDACompileError as e: - log.error( # noqa: TRY400 + log.error( "CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.", - e, + str(e), ) timing = float("inf") except NotImplementedError as e: @@ -2253,7 +2253,7 @@ class AlgorithmSelectorCache(PersistentCache): else: if "illegal memory access" in msg: msg += "\n\nEither error in template or triton bug.\n" - log.error( # noqa: TRY400 + log.error( "Runtime error during autotuning: \n%s. \nIgnoring this choice.", msg, ) diff --git a/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py b/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py index f2cfce78c51b..00b84d6c28ee 100644 --- a/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py +++ b/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py @@ -60,7 +60,7 @@ def _orthogonalize_gram_schmidt(matrices, epsilon=0): try: col /= torch.norm(col, dim=1, keepdim=True) except ZeroDivisionError: - logger.exception( + logger.error( "The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 " "as `orthogonalization_epsilon` in PowerSGD state." ) diff --git a/torch/distributed/checkpoint/_async_process_executor.py b/torch/distributed/checkpoint/_async_process_executor.py index ba7c2c0683bc..513d71f427de 100644 --- a/torch/distributed/checkpoint/_async_process_executor.py +++ b/torch/distributed/checkpoint/_async_process_executor.py @@ -235,7 +235,9 @@ class _AsyncCheckpointProcess: f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}" # noqa: G004 ) except BaseException as e: - logger.exception("Checkpoint background process encountered an exception") + logger.error( + f"Checkpoint background process encountered an exception: {e}" # noqa: G004 + ) parent_conn.send(e) raise finally: diff --git a/torch/distributed/checkpoint/logger.py b/torch/distributed/checkpoint/logger.py index eeca6f79c431..a8961493cbee 100644 --- a/torch/distributed/checkpoint/logger.py +++ b/torch/distributed/checkpoint/logger.py @@ -90,7 +90,7 @@ def _dcp_method_logger( msg_dict["event"] = "exception" msg_dict["error"] = f"{error}" msg_dict["time"] = time.time_ns() - _dcp_logger.error(msg_dict) # noqa: TRY400 + _dcp_logger.error(msg_dict) raise # end event diff --git a/torch/distributed/elastic/multiprocessing/tail_log.py b/torch/distributed/elastic/multiprocessing/tail_log.py index 88da3409be48..034072109b7f 100644 --- a/torch/distributed/elastic/multiprocessing/tail_log.py +++ b/torch/distributed/elastic/multiprocessing/tail_log.py @@ -141,7 +141,7 @@ class TailLog: try: f.result() except Exception as e: - logger.error( # noqa: TRY400 + logger.error( "error in log tailor for %s%s. %s: %s", self._name, local_rank, diff --git a/torch/distributed/pipelining/schedules.py b/torch/distributed/pipelining/schedules.py index 243e4ee35365..c3b316577744 100644 --- a/torch/distributed/pipelining/schedules.py +++ b/torch/distributed/pipelining/schedules.py @@ -1419,7 +1419,7 @@ class PipelineScheduleMulti(_PipelineSchedule): # do the communication _wait_batch_p2p(_batch_p2p(ops)) except Exception as e: - logger.error( # noqa: TRY400 + logger.error( "[Rank %s] pipeline schedule %s caught the following exception \ at time_step %s when running action %s", self.rank, @@ -1427,7 +1427,7 @@ class PipelineScheduleMulti(_PipelineSchedule): time_step, action, ) - logger.error( # noqa: TRY400 + logger.error( "%s", _format_pipeline_order( self.pipeline_order, error_step_number=time_step @@ -1739,7 +1739,7 @@ class _PipelineScheduleRuntime(PipelineScheduleMulti): else: raise ValueError(f"{action=} is unknown or unsupported") except Exception as e: - logger.error( # noqa: TRY400 + logger.error( "_PipelineScheduleRuntime caught exception at step %s when running action %s. Full Schedule:", time_step, action, diff --git a/torch/distributed/rpc/_utils.py b/torch/distributed/rpc/_utils.py index a5ec160047f7..8925bc662b5f 100644 --- a/torch/distributed/rpc/_utils.py +++ b/torch/distributed/rpc/_utils.py @@ -31,7 +31,7 @@ def _group_membership_management(store, name, is_join): try: store.wait([returned]) except RuntimeError: - logger.error( # noqa: TRY400 + logger.error( "Group membership token %s timed out waiting for %s to be released.", my_token, returned, diff --git a/torch/distributed/rpc/api.py b/torch/distributed/rpc/api.py index cb6b3b0d5e19..d4a6712e0d66 100644 --- a/torch/distributed/rpc/api.py +++ b/torch/distributed/rpc/api.py @@ -297,7 +297,7 @@ def _barrier(worker_names): try: _all_gather(None, set(worker_names)) except RuntimeError as ex: - logger.error("Failed to complete barrier, got error %s", ex) # noqa: TRY400 + logger.error("Failed to complete barrier, got error %s", ex) @_require_initialized @@ -312,7 +312,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT): try: _all_gather(None, timeout=timeout) except RuntimeError as ex: - logger.error( # noqa: TRY400 + logger.error( "Failed to respond to 'Shutdown Proceed' in time, got error %s", ex ) raise ex diff --git a/torch/export/_trace.py b/torch/export/_trace.py index 6554bf9172d0..b07c41b2ea99 100644 --- a/torch/export/_trace.py +++ b/torch/export/_trace.py @@ -1135,7 +1135,7 @@ def _log_export_wrapper(fn): error_type = t.__module__ + "." + t.__qualname__ case_name = get_class_if_classified_error(e) if case_name is not None: - log.error(exportdb_error_message(case_name)) # noqa: TRY400 + log.error(exportdb_error_message(case_name)) log_export_usage( event="export.error.classified", type=error_type, diff --git a/torch/fx/experimental/recording.py b/torch/fx/experimental/recording.py index 8814ec0a5f05..bb54eba11384 100644 --- a/torch/fx/experimental/recording.py +++ b/torch/fx/experimental/recording.py @@ -312,7 +312,7 @@ def record_shapeenv_event( if not shape_env.should_record_events or shape_env.is_recording: # If ShapeEnv is disabled or already recording an event, re-raise the exception without logging. raise - log.error( # noqa: G201, TRY400 + log.error( # noqa: G201 "failed while running %s(*%s, **%s)", name, args[1:], @@ -349,7 +349,7 @@ def replay_shape_env_events(events): # change after each event is replayed. event.run(shape_env) except Exception: - log.error("failed when running event: %s", event) # noqa: TRY400 + log.error("failed when running event: %s", event) raise return shape_env diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index 28c186486a7f..7a7c0c6a1d01 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -756,7 +756,7 @@ class MultiProcessTestCase(TestCase): ) sys.exit(TEST_SKIPS["generic"].exit_code) except Exception: - logger.error( # noqa: TRY400 + logger.error( "Caught exception: \n%s exiting " "process %s with exit code: %s", traceback.format_exc(), self.rank, @@ -791,7 +791,7 @@ class MultiProcessTestCase(TestCase): pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK) pipes.append((i, pipe)) except ConnectionError as e: - logger.error( # noqa: TRY400 + logger.error( "Encountered error while trying to get traceback for process %s: %s", i, e, @@ -818,7 +818,7 @@ class MultiProcessTestCase(TestCase): "Could not retrieve traceback for timed out process: %s", rank ) except ConnectionError as e: - logger.error( # noqa: TRY400 + logger.error( "Encountered error while trying to get traceback for process %s: %s", rank, e,