mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Revert "[BE]: Enable RUFF TRY400 rule - log.exception (#153473)"
This reverts commit 4f4ecc583e0f48ad2d062a53bf91c61ab40b4948. Reverted https://github.com/pytorch/pytorch/pull/153473 on behalf of https://github.com/jeanschmidt due to seems to have broken internal signals, @albanD may I count on you to help the author merge his PR? D74837988 ([comment](https://github.com/pytorch/pytorch/pull/153473#issuecomment-2886017075))
This commit is contained in:
4
.flake8
4
.flake8
@ -16,9 +16,7 @@ ignore =
|
||||
# these ignores are from flake8-comprehensions; please fix!
|
||||
C407,
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
G100,G101,G200,
|
||||
# G201 replaced by LOG400 in ruff
|
||||
G201,
|
||||
G100,G101,G200
|
||||
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
||||
# SIM104 is already covered by pyupgrade ruff
|
||||
|
6
.github/scripts/runner_determinator.py
vendored
6
.github/scripts/runner_determinator.py
vendored
@ -623,9 +623,9 @@ def main() -> None:
|
||||
is_canary,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
log.exception(
|
||||
"Failed to get issue. Defaulting to Meta runners and no experiments."
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to get issue. Defaulting to Meta runners and no experiments. Exception: {e}"
|
||||
)
|
||||
|
||||
set_github_output(GH_OUTPUT_KEY_LABEL_TYPE, runner_label_prefix)
|
||||
|
@ -1700,8 +1700,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
|
||||
f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
log.exception("Failed to save memory snapshot")
|
||||
except Exception as e:
|
||||
log.error("Failed to save memory snapshot, %s", e)
|
||||
|
||||
torch.cuda.memory._record_memory_history(enabled=None)
|
||||
|
||||
@ -2742,7 +2742,7 @@ class BenchmarkRunner:
|
||||
try:
|
||||
shutil.move("repro.py", f"{repro_dir}/{name}_repro.py")
|
||||
except OSError:
|
||||
log.exception("Could not find repro script for model %s", name)
|
||||
log.error("Could not find repro script for model %s", name)
|
||||
else:
|
||||
log.info(
|
||||
"Repro script for model %s with minified graph saved to %s",
|
||||
|
@ -197,7 +197,6 @@ select = [
|
||||
"TC",
|
||||
"TRY002", # ban vanilla raise (todo fix NOQAs)
|
||||
"TRY203",
|
||||
"TRY400", # use logging.exception
|
||||
"TRY401", # verbose-log-message
|
||||
"UP",
|
||||
"YTT",
|
||||
|
@ -47,15 +47,11 @@ def requirements_installed() -> bool:
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
logger.error( # noqa: TRY400
|
||||
"Requirements not installed, run the following command to install:",
|
||||
exc_info=False,
|
||||
logger.error(
|
||||
"Requirements not installed, run the following command to install:"
|
||||
)
|
||||
logger.error( # noqa: TRY400
|
||||
" > %s -m pip install -r %s/requirements.txt",
|
||||
sys.executable,
|
||||
ROOT_PATH,
|
||||
exc_info=False,
|
||||
logger.error(
|
||||
" > %s -m pip install -r %s/requirements.txt", sys.executable, ROOT_PATH
|
||||
)
|
||||
return False
|
||||
|
||||
|
@ -138,7 +138,7 @@ def wrap_compiler_debug(
|
||||
example_inputs,
|
||||
compiler_name,
|
||||
)
|
||||
log.exception("CompilerError")
|
||||
log.error("CompilerError")
|
||||
raise
|
||||
|
||||
# We may run regular PyTorch compute that may trigger Dynamo, do NOT
|
||||
|
@ -2148,7 +2148,7 @@ def torchscript(model, example_inputs, verbose=False):
|
||||
if verbose:
|
||||
log.exception("jit error")
|
||||
else:
|
||||
log.error("Both torch.jit.trace and torch.jit.script failed") # noqa: TRY400
|
||||
log.error("Both torch.jit.trace and torch.jit.script failed")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -359,7 +359,7 @@ class Guard:
|
||||
except Exception:
|
||||
log.exception("Error while creating guard:\n%s", str(self).rstrip())
|
||||
if self.stack:
|
||||
log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip()) # noqa: TRY400
|
||||
log.error("Created at:\n%s", "".join(self.stack.format()[-4:]).rstrip())
|
||||
raise
|
||||
|
||||
def is_specialized_nn_module(self):
|
||||
|
@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
|
||||
major, minor = torch.cuda.get_device_capability(0)
|
||||
return str(major * 10 + minor)
|
||||
return str(cuda_arch)
|
||||
except Exception:
|
||||
log.exception("Error getting cuda arch")
|
||||
except Exception as e:
|
||||
log.error("Error getting cuda arch: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
@ -35,8 +35,8 @@ def get_cuda_version() -> Optional[str]:
|
||||
if cuda_version is None:
|
||||
cuda_version = torch.version.cuda
|
||||
return cuda_version
|
||||
except Exception:
|
||||
log.exception("Error getting cuda version")
|
||||
except Exception as e:
|
||||
log.error("Error getting cuda version: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
|
@ -181,7 +181,7 @@ def _fx_compile_mode_default() -> tuple[FxCompileMode, bool]:
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.error( # noqa: TRY400
|
||||
log.error(
|
||||
"Invalid value of %s for %s. Expected one of %s. Using default.",
|
||||
value,
|
||||
name,
|
||||
|
@ -796,13 +796,13 @@ def create_node_mapping(
|
||||
except Exception as e:
|
||||
# Since this is just logging code, it should never interfere with regular
|
||||
# program execution, so we use this try-except to guard against any error
|
||||
log.error("Unexpected error in create_node_mapping: %s", e) # noqa: TRY400
|
||||
log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json) # noqa: TRY400
|
||||
log.error( # noqa: TRY400
|
||||
log.error("Unexpected error in create_node_mapping: %s", e)
|
||||
log.error("post_to_pre_grad_nodes_json: %s", post_to_pre_grad_nodes_json)
|
||||
log.error(
|
||||
"triton_kernel_to_post_grad_json: %s", triton_kernel_to_post_grad_json
|
||||
)
|
||||
log.error("pre_grad_graph_id: %s", pre_grad_graph_id) # noqa: TRY400
|
||||
log.error(traceback.format_exc()) # noqa: TRY400
|
||||
log.error("pre_grad_graph_id: %s", pre_grad_graph_id)
|
||||
log.error(traceback.format_exc())
|
||||
return empty_return
|
||||
|
||||
|
||||
|
@ -718,7 +718,7 @@ class CompiledFxGraph(OutputCode):
|
||||
)
|
||||
self.compiled_fn_runner = getattr(code_cache, "runner", None)
|
||||
except OSError:
|
||||
log.exception("Failed to load artifact: %s", artifact_path)
|
||||
log.error("Failed to load artifact: %s", artifact_path)
|
||||
raise
|
||||
|
||||
return artifact_path
|
||||
|
@ -2238,9 +2238,9 @@ class AlgorithmSelectorCache(PersistentCache):
|
||||
try:
|
||||
timing = cls.benchmark_choice(choice, autotune_args)
|
||||
except CUDACompileError as e:
|
||||
log.error( # noqa: TRY400
|
||||
log.error(
|
||||
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
|
||||
e,
|
||||
str(e),
|
||||
)
|
||||
timing = float("inf")
|
||||
except NotImplementedError as e:
|
||||
@ -2253,7 +2253,7 @@ class AlgorithmSelectorCache(PersistentCache):
|
||||
else:
|
||||
if "illegal memory access" in msg:
|
||||
msg += "\n\nEither error in template or triton bug.\n"
|
||||
log.error( # noqa: TRY400
|
||||
log.error(
|
||||
"Runtime error during autotuning: \n%s. \nIgnoring this choice.",
|
||||
msg,
|
||||
)
|
||||
|
@ -60,7 +60,7 @@ def _orthogonalize_gram_schmidt(matrices, epsilon=0):
|
||||
try:
|
||||
col /= torch.norm(col, dim=1, keepdim=True)
|
||||
except ZeroDivisionError:
|
||||
logger.exception(
|
||||
logger.error(
|
||||
"The matrices to be orthogonalized has at least a column of all 0s. Please set a small value such as 1e-8 "
|
||||
"as `orthogonalization_epsilon` in PowerSGD state."
|
||||
)
|
||||
|
@ -235,7 +235,9 @@ class _AsyncCheckpointProcess:
|
||||
f"Submitted checkpoint save request for checkpoint_id={obj.checkpoint_request_id}" # noqa: G004
|
||||
)
|
||||
except BaseException as e:
|
||||
logger.exception("Checkpoint background process encountered an exception")
|
||||
logger.error(
|
||||
f"Checkpoint background process encountered an exception: {e}" # noqa: G004
|
||||
)
|
||||
parent_conn.send(e)
|
||||
raise
|
||||
finally:
|
||||
|
@ -90,7 +90,7 @@ def _dcp_method_logger(
|
||||
msg_dict["event"] = "exception"
|
||||
msg_dict["error"] = f"{error}"
|
||||
msg_dict["time"] = time.time_ns()
|
||||
_dcp_logger.error(msg_dict) # noqa: TRY400
|
||||
_dcp_logger.error(msg_dict)
|
||||
raise
|
||||
|
||||
# end event
|
||||
|
@ -141,7 +141,7 @@ class TailLog:
|
||||
try:
|
||||
f.result()
|
||||
except Exception as e:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"error in log tailor for %s%s. %s: %s",
|
||||
self._name,
|
||||
local_rank,
|
||||
|
@ -1419,7 +1419,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
|
||||
# do the communication
|
||||
_wait_batch_p2p(_batch_p2p(ops))
|
||||
except Exception as e:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"[Rank %s] pipeline schedule %s caught the following exception \
|
||||
at time_step %s when running action %s",
|
||||
self.rank,
|
||||
@ -1427,7 +1427,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
|
||||
time_step,
|
||||
action,
|
||||
)
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"%s",
|
||||
_format_pipeline_order(
|
||||
self.pipeline_order, error_step_number=time_step
|
||||
@ -1739,7 +1739,7 @@ class _PipelineScheduleRuntime(PipelineScheduleMulti):
|
||||
else:
|
||||
raise ValueError(f"{action=} is unknown or unsupported")
|
||||
except Exception as e:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"_PipelineScheduleRuntime caught exception at step %s when running action %s. Full Schedule:",
|
||||
time_step,
|
||||
action,
|
||||
|
@ -31,7 +31,7 @@ def _group_membership_management(store, name, is_join):
|
||||
try:
|
||||
store.wait([returned])
|
||||
except RuntimeError:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"Group membership token %s timed out waiting for %s to be released.",
|
||||
my_token,
|
||||
returned,
|
||||
|
@ -297,7 +297,7 @@ def _barrier(worker_names):
|
||||
try:
|
||||
_all_gather(None, set(worker_names))
|
||||
except RuntimeError as ex:
|
||||
logger.error("Failed to complete barrier, got error %s", ex) # noqa: TRY400
|
||||
logger.error("Failed to complete barrier, got error %s", ex)
|
||||
|
||||
|
||||
@_require_initialized
|
||||
@ -312,7 +312,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
|
||||
try:
|
||||
_all_gather(None, timeout=timeout)
|
||||
except RuntimeError as ex:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
|
||||
)
|
||||
raise ex
|
||||
|
@ -1135,7 +1135,7 @@ def _log_export_wrapper(fn):
|
||||
error_type = t.__module__ + "." + t.__qualname__
|
||||
case_name = get_class_if_classified_error(e)
|
||||
if case_name is not None:
|
||||
log.error(exportdb_error_message(case_name)) # noqa: TRY400
|
||||
log.error(exportdb_error_message(case_name))
|
||||
log_export_usage(
|
||||
event="export.error.classified",
|
||||
type=error_type,
|
||||
|
@ -312,7 +312,7 @@ def record_shapeenv_event(
|
||||
if not shape_env.should_record_events or shape_env.is_recording:
|
||||
# If ShapeEnv is disabled or already recording an event, re-raise the exception without logging.
|
||||
raise
|
||||
log.error( # noqa: G201, TRY400
|
||||
log.error( # noqa: G201
|
||||
"failed while running %s(*%s, **%s)",
|
||||
name,
|
||||
args[1:],
|
||||
@ -349,7 +349,7 @@ def replay_shape_env_events(events):
|
||||
# change after each event is replayed.
|
||||
event.run(shape_env)
|
||||
except Exception:
|
||||
log.error("failed when running event: %s", event) # noqa: TRY400
|
||||
log.error("failed when running event: %s", event)
|
||||
raise
|
||||
|
||||
return shape_env
|
||||
|
@ -756,7 +756,7 @@ class MultiProcessTestCase(TestCase):
|
||||
)
|
||||
sys.exit(TEST_SKIPS["generic"].exit_code)
|
||||
except Exception:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"Caught exception: \n%s exiting " "process %s with exit code: %s",
|
||||
traceback.format_exc(),
|
||||
self.rank,
|
||||
@ -791,7 +791,7 @@ class MultiProcessTestCase(TestCase):
|
||||
pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
|
||||
pipes.append((i, pipe))
|
||||
except ConnectionError as e:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"Encountered error while trying to get traceback for process %s: %s",
|
||||
i,
|
||||
e,
|
||||
@ -818,7 +818,7 @@ class MultiProcessTestCase(TestCase):
|
||||
"Could not retrieve traceback for timed out process: %s", rank
|
||||
)
|
||||
except ConnectionError as e:
|
||||
logger.error( # noqa: TRY400
|
||||
logger.error(
|
||||
"Encountered error while trying to get traceback for process %s: %s",
|
||||
rank,
|
||||
e,
|
||||
|
Reference in New Issue
Block a user