From 3443627e078deb813ae37f7182d41a802bd05ac4 Mon Sep 17 00:00:00 2001
From: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>
Date: Fri, 16 May 2025 08:29:26 +0000
Subject: [PATCH] Revert "[BE]: Enable RUFF TRY400 rule - log.exception
 (#153473)"

This reverts commit 4f4ecc583e0f48ad2d062a53bf91c61ab40b4948.

Reverted https://github.com/pytorch/pytorch/pull/153473 on behalf of https://github.com/jeanschmidt due to seems to have broken internal signals, @albanD may I count on you to help the author merge his PR? D74837988 ([comment](https://github.com/pytorch/pytorch/pull/153473#issuecomment-2886017075))
---
 .flake8                                              |  4 +---
 .github/scripts/runner_determinator.py               |  6 +++---
 benchmarks/dynamo/common.py                          |  6 +++---
 pyproject.toml                                       |  1 -
 tools/packaging/split_wheel.py                       | 12 ++++--------
 torch/_dynamo/repro/after_aot.py                     |  2 +-
 torch/_dynamo/utils.py                               |  2 +-
 torch/_guards.py                                     |  2 +-
 torch/_inductor/codegen/cuda/cuda_env.py             |  8 ++++----
 torch/_inductor/compile_fx.py                        |  2 +-
 torch/_inductor/debug.py                             | 10 +++++-----
 torch/_inductor/output_code.py                       |  2 +-
 torch/_inductor/select_algorithm.py                  |  6 +++---
 .../algorithms/ddp_comm_hooks/powerSGD_hook.py       |  2 +-
 .../checkpoint/_async_process_executor.py            |  4 +++-
 torch/distributed/checkpoint/logger.py               |  2 +-
 .../distributed/elastic/multiprocessing/tail_log.py  |  2 +-
 torch/distributed/pipelining/schedules.py            |  6 +++---
 torch/distributed/rpc/_utils.py                      |  2 +-
 torch/distributed/rpc/api.py                         |  4 ++--
 torch/export/_trace.py                               |  2 +-
 torch/fx/experimental/recording.py                   |  4 ++--
 torch/testing/_internal/common_distributed.py        |  6 +++---
 23 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/.flake8 b/.flake8
index 042bfbe00a3f..2d854aa6b258 100644
--- a/.flake8
+++ b/.flake8
@@ -16,9 +16,7 @@ ignore =
     # these ignores are from flake8-comprehensions; please fix!
     C407,
     # these ignores are from flake8-logging-format; please fix!
-    G100,G101,G200,
-    # G201 replaced by LOG400 in ruff
-    G201,
+    G100,G101,G200
     # these ignores are from flake8-simplify. please fix or ignore with commented reason
     SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
     # SIM104 is already covered by pyupgrade ruff
diff --git a/.github/scripts/runner_determinator.py b/.github/scripts/runner_determinator.py
index 5617f7bfa240..1481459d40c4 100644
--- a/.github/scripts/runner_determinator.py
+++ b/.github/scripts/runner_determinator.py
@@ -623,9 +623,9 @@ def main() -> None:
             is_canary,
         )
 
-    except Exception:
-        log.exception(
-            "Failed to get issue. Defaulting to Meta runners and no experiments."
+    except Exception as e:
+        log.error(
+            f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
         )
 
     set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index b2fa8d6f0d5d..2273d2570ecf 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -1700,8 +1700,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
                         f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
                     )
                 )
-            except Exception:
-                log.exception("Failed to save memory snapshot")
+            except Exception as e:
+                log.error("Failed to save memory snapshot, %s", e)
 
             torch.cuda.memory._record_memory_history(enabled=None)
 
@@ -2742,7 +2742,7 @@ class BenchmarkRunner:
         try:
             shutil.move("repro.py", f"{repro_dir}/{name}_repro.py")
         except OSError:
-            log.exception("Could not find repro script for model %s", name)
+            log.error("Could not find repro script for model %s", name)
         else:
             log.info(
                 "Repro script for model %s with minified graph saved to %s",
diff --git a/pyproject.toml b/pyproject.toml
index e045eecf3c0e..7efbf270a50f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -197,7 +197,6 @@ select = [
     "TC",
     "TRY002", # ban vanilla raise (todo fix NOQAs)
     "TRY203",
-    "TRY400", # use logging.exception
     "TRY401", # verbose-log-message
     "UP",
     "YTT",
diff --git a/tools/packaging/split_wheel.py b/tools/packaging/split_wheel.py
index c6cf50e984fe..1aa77aa5c694 100644
--- a/tools/packaging/split_wheel.py
+++ b/tools/packaging/split_wheel.py
@@ -47,15 +47,11 @@ def requirements_installed() -> bool:
 
         return True
     except ImportError:
-        logger.error(  # noqa: TRY400
-            "Requirements not installed, run the following command to install:",
-            exc_info=False,
+        logger.error(
+            "Requirements not installed, run the following command to install:"
         )
-        logger.error(  # noqa: TRY400
-            "    > %s -m pip install -r %s/requirements.txt",
-            sys.executable,
-            ROOT_PATH,
-            exc_info=False,
+        logger.error(
+            "    > %s -m pip install -r %s/requirements.txt", sys.executable, ROOT_PATH
         )
         return False
 
diff --git a/torch/_dynamo/repro/after_aot.py b/torch/_dynamo/repro/after_aot.py
index 55ca964ec5c2..00e412c7cc58 100644
--- a/torch/_dynamo/repro/after_aot.py
+++ b/torch/_dynamo/repro/after_aot.py
@@ -138,7 +138,7 @@ def wrap_compiler_debug(
                         example_inputs,
                         compiler_name,
                     )
-                log.exception("CompilerError")
+                log.error("CompilerError")
             raise
 
         # We may run regular PyTorch compute that may trigger Dynamo, do NOT
diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py
index 335f4272c172..d6e758b76fbd 100644
--- a/torch/_dynamo/utils.py
+++ b/torch/_dynamo/utils.py
@@ -2148,7 +2148,7 @@ def torchscript(model, example_inputs, verbose=False):
             if verbose:
                 log.exception("jit error")
             else:
-                log.error("Both torch.jit.trace and torch.jit.script failed")  # noqa: TRY400
+                log.error("Both torch.jit.trace and torch.jit.script failed")
     return None
 
 
diff --git a/torch/_guards.py b/torch/_guards.py
index 33c3b958d53c..8a763ff68e33 100644
--- a/torch/_guards.py
+++ b/torch/_guards.py
@@ -359,7 +359,7 @@ class Guard:
         except Exception:
             log.exception("Error while creating guard:\n%s", str(self).rstrip())
             if self.stack:
-                log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())  # noqa: TRY400
+                log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())
             raise
 
     def is_specialized_nn_module(self):
diff --git a/torch/_inductor/codegen/cuda/cuda_env.py b/torch/_inductor/codegen/cuda/cuda_env.py
index c4d8ec40b087..95be434e03b7 100644
--- a/torch/_inductor/codegen/cuda/cuda_env.py
+++ b/torch/_inductor/codegen/cuda/cuda_env.py
@@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
             major, minor = torch.cuda.get_device_capability(0)
             return str(major * 10 + minor)
         return str(cuda_arch)
-    except Exception:
-        log.exception("Error getting cuda arch")
+    except Exception as e:
+        log.error("Error getting cuda arch: %s", e)
         return None
 
 
@@ -35,8 +35,8 @@ def get_cuda_version() -> Optional[str]:
         if cuda_version is None:
             cuda_version = torch.version.cuda
         return cuda_version
-    except Exception:
-        log.exception("Error getting cuda version")
+    except Exception as e:
+        log.error("Error getting cuda version: %s", e)
         return None
 
 
diff --git a/torch/_inductor/compile_fx.py b/torch/_inductor/compile_fx.py
index 941b02d45a9e..784d6e80311b 100644
--- a/torch/_inductor/compile_fx.py
+++ b/torch/_inductor/compile_fx.py
@@ -181,7 +181,7 @@ def _fx_compile_mode_default() -> tuple[FxCompileMode, bool]:
         import logging
 
         log = logging.getLogger(__name__)
-        log.error(  # noqa: TRY400
+        log.error(
             "Invalid value of %s for %s. Expected one of %s. Using default.",
             value,
             name,
diff --git a/torch/_inductor/debug.py b/torch/_inductor/debug.py
index a509329fe1fc..140d99d4b7d0 100644
--- a/torch/_inductor/debug.py
+++ b/torch/_inductor/debug.py
@@ -796,13 +796,13 @@ def create_node_mapping(
     except Exception as e:
         # Since this is just logging code, it should never interfere with regular
         # program execution, so we use this try-except to guard against any error
-        log.error("Unexpected error in create_node_mapping: %s", e)  # noqa: TRY400
-        log.error("post_to_pre_grad_nodes_json:  %s", post_to_pre_grad_nodes_json)  # noqa: TRY400
-        log.error(  # noqa: TRY400
+        log.error("Unexpected error in create_node_mapping: %s", e)
+        log.error("post_to_pre_grad_nodes_json:  %s", post_to_pre_grad_nodes_json)
+        log.error(
             "triton_kernel_to_post_grad_json:  %s", triton_kernel_to_post_grad_json
         )
-        log.error("pre_grad_graph_id:  %s", pre_grad_graph_id)  # noqa: TRY400
-        log.error(traceback.format_exc())  # noqa: TRY400
+        log.error("pre_grad_graph_id:  %s", pre_grad_graph_id)
+        log.error(traceback.format_exc())
         return empty_return
 
 
diff --git a/torch/_inductor/output_code.py b/torch/_inductor/output_code.py
index 10214143ae95..2218f2c0dbe0 100644
--- a/torch/_inductor/output_code.py
+++ b/torch/_inductor/output_code.py
@@ -718,7 +718,7 @@ class CompiledFxGraph(OutputCode):
                 )
                 self.compiled_fn_runner = getattr(code_cache, "runner", None)
         except OSError:
-            log.exception("Failed to load artifact: %s", artifact_path)
+            log.error("Failed to load artifact: %s", artifact_path)
             raise
 
         return artifact_path
diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py
index c1f1ab25d564..9916d82b69f5 100644
--- a/torch/_inductor/select_algorithm.py
+++ b/torch/_inductor/select_algorithm.py
@@ -2238,9 +2238,9 @@ class AlgorithmSelectorCache(PersistentCache):
             try:
                 timing = cls.benchmark_choice(choice, autotune_args)
             except CUDACompileError as e:
-                log.error(  # noqa: TRY400
+                log.error(
                     "CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
-                    e,
+                    str(e),
                 )
                 timing = float("inf")
             except NotImplementedError as e:
@@ -2253,7 +2253,7 @@ class AlgorithmSelectorCache(PersistentCache):
                 else:
                     if "illegal memory access" in msg:
                         msg += "\n\nEither error in template or triton bug.\n"
-                log.error(  # noqa: TRY400
+                log.error(
                     "Runtime error during autotuning: \n%s. \nIgnoring this choice.",
                     msg,
                 )
diff --git a/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py b/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
index f2cfce78c51b..00b84d6c28ee 100644
--- a/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
+++ b/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
@@ -60,7 +60,7 @@ def _orthogonalize_gram_schmidt(matrices, epsilon=0):
             try:
                 col /= torch.norm(col, dim=1, keepdim=True)
             except ZeroDivisionError:
-                logger.exception(
+                logger.error(
                     "The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 "
                     "as `orthogonalization_epsilon` in PowerSGD state."
                 )
diff --git a/torch/distributed/checkpoint/_async_process_executor.py b/torch/distributed/checkpoint/_async_process_executor.py
index ba7c2c0683bc..513d71f427de 100644
--- a/torch/distributed/checkpoint/_async_process_executor.py
+++ b/torch/distributed/checkpoint/_async_process_executor.py
@@ -235,7 +235,9 @@ class _AsyncCheckpointProcess:
                     f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}"  # noqa: G004
                 )
         except BaseException as e:
-            logger.exception("Checkpoint background process encountered an exception")
+            logger.error(
+                f"Checkpoint background process encountered an exception: {e}"  # noqa: G004
+            )
             parent_conn.send(e)
             raise
         finally:
diff --git a/torch/distributed/checkpoint/logger.py b/torch/distributed/checkpoint/logger.py
index eeca6f79c431..a8961493cbee 100644
--- a/torch/distributed/checkpoint/logger.py
+++ b/torch/distributed/checkpoint/logger.py
@@ -90,7 +90,7 @@ def _dcp_method_logger(
                     msg_dict["event"] = "exception"
                     msg_dict["error"] = f"{error}"
                     msg_dict["time"] = time.time_ns()
-                    _dcp_logger.error(msg_dict)  # noqa: TRY400
+                    _dcp_logger.error(msg_dict)
                 raise
 
             # end event
diff --git a/torch/distributed/elastic/multiprocessing/tail_log.py b/torch/distributed/elastic/multiprocessing/tail_log.py
index 88da3409be48..034072109b7f 100644
--- a/torch/distributed/elastic/multiprocessing/tail_log.py
+++ b/torch/distributed/elastic/multiprocessing/tail_log.py
@@ -141,7 +141,7 @@ class TailLog:
             try:
                 f.result()
             except Exception as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                     "error in log tailor for %s%s. %s: %s",
                     self._name,
                     local_rank,
diff --git a/torch/distributed/pipelining/schedules.py b/torch/distributed/pipelining/schedules.py
index 243e4ee35365..c3b316577744 100644
--- a/torch/distributed/pipelining/schedules.py
+++ b/torch/distributed/pipelining/schedules.py
@@ -1419,7 +1419,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
                 # do the communication
                 _wait_batch_p2p(_batch_p2p(ops))
             except Exception as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                     "[Rank %s] pipeline schedule %s caught the following exception \
                      at time_step %s when running action %s",
                     self.rank,
@@ -1427,7 +1427,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
                     time_step,
                     action,
                 )
-                logger.error(  # noqa: TRY400
+                logger.error(
                     "%s",
                     _format_pipeline_order(
                         self.pipeline_order, error_step_number=time_step
@@ -1739,7 +1739,7 @@ class _PipelineScheduleRuntime(PipelineScheduleMulti):
                 else:
                     raise ValueError(f"{action=} is unknown or unsupported")
             except Exception as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                     "_PipelineScheduleRuntime caught exception at step %s when running action %s.  Full Schedule:",
                     time_step,
                     action,
diff --git a/torch/distributed/rpc/_utils.py b/torch/distributed/rpc/_utils.py
index a5ec160047f7..8925bc662b5f 100644
--- a/torch/distributed/rpc/_utils.py
+++ b/torch/distributed/rpc/_utils.py
@@ -31,7 +31,7 @@ def _group_membership_management(store, name, is_join):
             try:
                 store.wait([returned])
             except RuntimeError:
-                logger.error(  # noqa: TRY400
+                logger.error(
                     "Group membership token %s timed out waiting for %s to be released.",
                     my_token,
                     returned,
diff --git a/torch/distributed/rpc/api.py b/torch/distributed/rpc/api.py
index cb6b3b0d5e19..d4a6712e0d66 100644
--- a/torch/distributed/rpc/api.py
+++ b/torch/distributed/rpc/api.py
@@ -297,7 +297,7 @@ def _barrier(worker_names):
     try:
         _all_gather(None, set(worker_names))
     except RuntimeError as ex:
-        logger.error("Failed to complete barrier, got error %s", ex)  # noqa: TRY400
+        logger.error("Failed to complete barrier, got error %s", ex)
 
 
 @_require_initialized
@@ -312,7 +312,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
     try:
         _all_gather(None, timeout=timeout)
     except RuntimeError as ex:
-        logger.error(  # noqa: TRY400
+        logger.error(
             "Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
         )
         raise ex
diff --git a/torch/export/_trace.py b/torch/export/_trace.py
index 6554bf9172d0..b07c41b2ea99 100644
--- a/torch/export/_trace.py
+++ b/torch/export/_trace.py
@@ -1135,7 +1135,7 @@ def _log_export_wrapper(fn):
             error_type = t.__module__ + "." + t.__qualname__
             case_name = get_class_if_classified_error(e)
             if case_name is not None:
-                log.error(exportdb_error_message(case_name))  # noqa: TRY400
+                log.error(exportdb_error_message(case_name))
                 log_export_usage(
                     event="export.error.classified",
                     type=error_type,
diff --git a/torch/fx/experimental/recording.py b/torch/fx/experimental/recording.py
index 8814ec0a5f05..bb54eba11384 100644
--- a/torch/fx/experimental/recording.py
+++ b/torch/fx/experimental/recording.py
@@ -312,7 +312,7 @@ def record_shapeenv_event(
                 if not shape_env.should_record_events or shape_env.is_recording:
                     # If ShapeEnv is disabled or already recording an event, re-raise the exception without logging.
                     raise
-                log.error(  # noqa: G201, TRY400
+                log.error(  # noqa: G201
                     "failed while running %s(*%s, **%s)",
                     name,
                     args[1:],
@@ -349,7 +349,7 @@ def replay_shape_env_events(events):
             # change after each event is replayed.
             event.run(shape_env)
         except Exception:
-            log.error("failed when running event: %s", event)  # noqa: TRY400
+            log.error("failed when running event: %s", event)
             raise
 
     return shape_env
diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py
index 28c186486a7f..7a7c0c6a1d01 100644
--- a/torch/testing/_internal/common_distributed.py
+++ b/torch/testing/_internal/common_distributed.py
@@ -756,7 +756,7 @@ class MultiProcessTestCase(TestCase):
             )
             sys.exit(TEST_SKIPS["generic"].exit_code)
         except Exception:
-            logger.error(  # noqa: TRY400
+            logger.error(
                 "Caught exception: \n%s exiting " "process %s with exit code: %s",
                 traceback.format_exc(),
                 self.rank,
@@ -791,7 +791,7 @@ class MultiProcessTestCase(TestCase):
                     pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
                     pipes.append((i, pipe))
                 except ConnectionError as e:
-                    logger.error(  # noqa: TRY400
+                    logger.error(
                         "Encountered error while trying to get traceback for process %s: %s",
                         i,
                         e,
@@ -818,7 +818,7 @@ class MultiProcessTestCase(TestCase):
                         "Could not retrieve traceback for timed out process: %s", rank
                     )
             except ConnectionError as e:
-                logger.error(  # noqa: TRY400
+                logger.error(
                     "Encountered error while trying to get traceback for process %s: %s",
                     rank,
                     e,