Enable all flake8-logging-format rules (#164655)

These rules are enabled by removing existing suppressions.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/164655
Approved by: https://github.com/janeyx99, https://github.com/mlazos
This commit is contained in:
Yuanyuan Chen
2025-10-19 00:59:28 +00:00
committed by PyTorch MergeBot
parent c4f6619330
commit 3255e7872b
55 changed files with 131 additions and 140 deletions

View File

@ -57,8 +57,8 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
logger.info("Successfully cloned %s", target)
return r, commit
except GitCommandError as e:
logger.error("Git operation failed: %s", e)
except GitCommandError:
logger.exception("Git operation failed")
raise

View File

@ -13,8 +13,6 @@ ignore =
EXE001,
# these ignores are from flake8-bugbear; please fix!
B007,B008,B017,B019,B023,B028,B903,B905,B906,B907,B908,B910
# these ignores are from flake8-logging-format; please fix!
G100,G101,G200
# these ignores are from flake8-simplify. please fix or ignore with commented reason
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
# SIM104 is already covered by pyupgrade ruff

View File

@ -1751,8 +1751,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
)
)
except Exception as e:
log.error("Failed to save memory snapshot, %s", e)
except Exception:
log.exception("Failed to save memory snapshot")
torch.cuda.memory._record_memory_history(enabled=None)

View File

@ -296,8 +296,8 @@ class OperatorInputsLoader:
for key in self.operator_db.keys():
try:
op = eval(key)
except AttributeError as ae:
log.warning("Evaluating an op name into an OpOverload: %s", ae)
except AttributeError:
log.warning("Evaluating an op name into an OpOverload", exc_info=True)
continue
yield op

View File

@ -159,8 +159,6 @@ ignore = [
"EXE001",
"F405",
"FURB122", # writelines
# these ignores are from flake8-logging-format; please fix!
"G101",
# these ignores are from ruff NPY; please fix!
"NPY002",
# these ignores are from ruff PERF; please fix!

View File

@ -72,7 +72,7 @@ try:
except ImportError as e:
# In FBCode we separate FX out into a separate target for the sake of dev
# velocity. These are covered by a separate test target `quantization_fx`
log.warning(e)
log.warning(e) # noqa:G200
# PyTorch 2 Export Quantization
try:
@ -94,7 +94,7 @@ try:
except ImportError as e:
# In FBCode we separate PT2 out into a separate target for the sake of dev
# velocity. These are covered by a separate test target `quantization_pt2e`
log.warning(e)
log.warning(e) # noqa:G200
try:
from quantization.fx.test_numeric_suite_fx import TestFXGraphMatcher # noqa: F401
@ -103,7 +103,7 @@ try:
from quantization.fx.test_numeric_suite_fx import TestFXNumericSuiteNShadows # noqa: F401
from quantization.fx.test_numeric_suite_fx import TestFXNumericSuiteCoreAPIsModels # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
# Test the model report module
try:
@ -115,19 +115,19 @@ try:
from quantization.fx.test_model_report_fx import TestFxDetectOutliers # noqa: F401
from quantization.fx.test_model_report_fx import TestFxModelReportVisualizer # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
# Equalization for FX mode
try:
from quantization.fx.test_equalize_fx import TestEqualizeFx # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
# Backward Compatibility. Tests serialization and BC for quantized modules.
try:
from quantization.bc.test_backward_compatibility import TestSerialization # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
# JIT Graph Mode Quantization
from quantization.jit.test_quantize_jit import TestQuantizeJit # noqa: F401
@ -146,29 +146,29 @@ from quantization.ao_migration.test_ao_migration import TestAOMigrationNNIntrins
try:
from quantization.ao_migration.test_quantization_fx import TestAOMigrationQuantizationFx # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
# Experimental functionality
try:
from quantization.core.experimental.test_bits import TestBitsCPU # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
try:
from quantization.core.experimental.test_bits import TestBitsCUDA # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
try:
from quantization.core.experimental.test_floatx import TestFloat8DtypeCPU # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
try:
from quantization.core.experimental.test_floatx import TestFloat8DtypeCUDA # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
try:
from quantization.core.experimental.test_floatx import TestFloat8DtypeCPUOnlyCPU # noqa: F401
except ImportError as e:
log.warning(e)
log.warning(e) # noqa:G200
if __name__ == '__main__':
run_tests()

View File

@ -73,7 +73,7 @@ def run_command(
if remaining_retries == 0:
raise err
remaining_retries -= 1
logging.warning(
logging.warning( # noqa: G200
"(%s/%s) Retrying because command failed with: %r",
retries - remaining_retries,
retries,

View File

@ -172,7 +172,7 @@ def run_command(
):
raise err
remaining_retries -= 1
logging.warning(
logging.warning( # noqa: G200
"(%s/%s) Retrying because command failed with: %r",
retries - remaining_retries,
retries,

View File

@ -112,7 +112,7 @@ def run_command(
if remaining_retries == 0:
raise err
remaining_retries -= 1
logging.warning(
logging.warning( # noqa: G200
"(%s/%s) Retrying because command failed with: %r",
retries - remaining_retries,
retries,

View File

@ -95,8 +95,8 @@ Deleting %s just to be safe.
try:
binary_path.unlink()
except OSError as e:
logging.critical("Failed to delete binary: %s", e)
except OSError:
logging.critical("Failed to delete binary", exc_info=True)
logging.critical(
"Delete this binary as soon as possible and do not execute it!"
)

View File

@ -114,7 +114,7 @@ def _find_manylinux_interpreters() -> list[str]:
)
except subprocess.CalledProcessError as e:
logger.debug("Failed to get version for %s: %s", python_path, e)
logger.debug("Failed to get version for %s: %s", python_path, e) # noqa:G200
continue
return interpreters

View File

@ -1215,7 +1215,7 @@ def compile_frame( # type: ignore[return]
except exc.SkipFrame as e:
if not isinstance(e, exc.TensorifyScalarRestartAnalysis):
TensorifyState.clear()
log.debug(
log.debug( # noqa: G200
"Skipping frame %s %s \
%s %s",
e,

View File

@ -753,8 +753,10 @@ class _TorchDynamoContext:
fn, result.dynamo, ignore_inlined_sources=False
)
self._package.install(result.backends)
except RuntimeError as e:
log.warning("Failed to load entry from dynamo cache: %s", e)
except RuntimeError:
log.warning(
"Failed to load entry from dynamo cache", exc_info=True
)
self._package.initialize(fn, None, ignore_inlined_sources=False)
fn = innermost_fn(fn)

View File

@ -532,8 +532,8 @@ def _load_gb_type_to_gb_id_map() -> dict[str, Any]:
)
with open(registry_path) as f:
registry = json.load(f)
except Exception as e:
log.error("Error accessing the registry file: %s", e)
except Exception:
log.exception("Error accessing the registry file")
registry = {}
mapping = {}

View File

@ -269,7 +269,7 @@ class GraphRegionTracker:
duplicates.append(node)
self.node_to_duplicates[node] = duplicates
except NodeHashException as e:
log.debug("Unable to hash node %s with exception %s", node, e)
log.debug("Unable to hash node %s with exception %s", node, e) # noqa: G200
def track_node_mutations(
self,

View File

@ -1122,9 +1122,9 @@ class DiskDynamoCache(DiskDynamoStore):
result = super().load_cache_entry(key)
counters["dynamo_cache"]["dynamo_cache_hit"] += 1
return result
except Exception as e:
except Exception:
counters["dynamo_cache"]["dynamo_cache_error"] += 1
logger.warning("Failed to load package from path %s: %s", path, str(e))
logger.warning("Failed to load package from path %s", exc_info=True)
return None
logger.info("No package found for %s", key)
counters["dynamo_cache"]["dynamo_cache_miss"] += 1

View File

@ -203,7 +203,7 @@ class PrecompileContext:
if result is not None:
precompile_cache_entries[key] = result
except Exception as e:
logger.warning("Failed to create cache entry %s: %s", key, str(e))
logger.warning("Failed to create cache entry %s", key, exc_info=True)
error = e
data = json.dumps(

View File

@ -1041,7 +1041,7 @@ class BuiltinVariable(VariableTracker):
except TypeError as e:
has_constant_handler = obj.has_constant_handler(args, kwargs)
if not has_constant_handler:
log.warning(
log.warning( # noqa: G200
"incorrect arg count %s %s and no constant handler",
self_handler,
e,
@ -1560,9 +1560,9 @@ class BuiltinVariable(VariableTracker):
try:
# Only supports certain function types
user_func_variable = variables.UserFunctionVariable(bound_method)
except AssertionError as e:
except AssertionError:
# Won't be able to do inline the str method, return to avoid graph break
log.warning("Failed to create UserFunctionVariable: %s", e)
log.warning("Failed to create UserFunctionVariable", exc_info=True)
return
# Inline the user function

View File

@ -1183,7 +1183,7 @@ def speculate_subgraph(
f"fall back to eager-mode PyTorch, which could lead to a slowdown."
)
log.info(msg)
log.info(ex)
log.info(ex) # noqa: G200
raise ex

View File

@ -1221,7 +1221,7 @@ class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]):
except Exception as e:
cache_key = None
counters["aot_autograd"]["autograd_cache_bypass"] += 1
log.info("Bypassing autograd cache due to: %s", e)
log.info("Bypassing autograd cache due to: %s", e) # noqa: G200
cache_state = "bypass"
cache_event_time = time.time_ns()
cache_info["cache_bypass_reason"] = str(e)
@ -1368,7 +1368,7 @@ class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]):
),
)
except Exception as e:
log.info("AOTAutograd cache unable to load compiled graph: %s", e)
log.info("AOTAutograd cache unable to load compiled graph: %s", e) # noqa: G200
if config.strict_autograd_cache:
raise e
if entry is not None:
@ -1414,12 +1414,12 @@ class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]):
counters["aot_autograd"]["autograd_cache_saved"] += 1
except BypassAOTAutogradCache as e:
counters["aot_autograd"]["autograd_cache_bypass"] += 1
log.info("Bypassing autograd cache due to: %s", e)
log.info("Bypassing autograd cache due to: %s", e) # noqa: G200
if remote:
log_cache_bypass("bypass_aot_autograd", str(e))
return None
except Exception as e:
log.info("AOTAutograd cache unable to serialize compiled graph: %s", e)
log.info("AOTAutograd cache unable to serialize compiled graph: %s", e) # noqa: G200
if remote:
log_cache_bypass(
"bypass_aot_autograd", "Unable to serialize: " + str(e)

View File

@ -1516,7 +1516,7 @@ class FxGraphCache(GuardedCache[CompiledFxGraph]):
)
except BypassFxGraphCache as e:
counters["inductor"]["fxgraph_cache_bypass"] += 1
log.info("Bypassing FX Graph Cache because '%s'", e)
log.info("Bypassing FX Graph Cache because '%s'", e) # noqa: G200
if remote:
log_cache_bypass("bypass_fx_graph", str(e))
cache_info = {

View File

@ -2493,7 +2493,7 @@ class KernelTemplate:
choices.append(self.generate(**kwargs))
return None
except NotImplementedError as e:
log.info(
log.info( # noqa: G200
"Cannot Append Choice: %s. KernelTemplate type is %s",
e,
type(self),

View File

@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
major, minor = torch.cuda.get_device_capability(0)
return str(major * 10 + minor)
return str(cuda_arch)
except Exception as e:
log.error("Error getting cuda arch: %s", e)
except Exception:
log.exception("Error getting cuda arch")
return None
@ -45,8 +45,8 @@ def get_cuda_version() -> Optional[str]:
if cuda_version is None:
cuda_version = torch.version.cuda
return cuda_version
except Exception as e:
log.error("Error getting cuda version: %s", e)
except Exception:
log.exception("Error getting cuda version")
return None

View File

@ -94,11 +94,11 @@ def maybe_fetch_ops() -> Optional[list[Any]]:
assert isinstance(serialized_ops, list), (
f"Expected serialized ops is a list, got {type(serialized_ops)}"
)
except Exception as e:
except Exception:
log.warning(
"Failed to load CUTLASS config %s from local cache: %s",
"Failed to load CUTLASS config %s from local cache",
filename,
e,
exc_info=True,
)
serialized_ops = None
elif config.is_fbcode():

View File

@ -53,8 +53,8 @@ def move_cutlass_compiled_cache() -> None:
filename = os.path.basename(cutlass_cppgen.CACHE_FILE)
shutil.move(cutlass_cppgen.CACHE_FILE, os.path.join(cache_dir(), filename))
log.debug("Moved CUTLASS compiled cache file to %s", cache_dir())
except OSError as e:
log.warning("Failed to move CUTLASS compiled cache file: %s", e)
except OSError:
log.warning("Failed to move CUTLASS compiled cache file", exc_info=True)
def _rename_cutlass_import(content: str, cutlass_modules: list[str]) -> str:
@ -79,7 +79,7 @@ def try_import_cutlass() -> bool:
import cutlass_cppgen # type: ignore[import-not-found] # noqa: F401
import cutlass_library # type: ignore[import-not-found]
except ImportError as e:
log.warning(
log.warning( # noqa: G200
"Failed to import CUTLASS packages in fbcode: %s, ignoring the CUTLASS backend.",
str(e),
)
@ -164,7 +164,7 @@ def try_import_cutlass() -> bool:
return True
except ImportError as e:
log.debug(
log.debug( # noqa: G200
"Failed to import CUTLASS packages: %s, ignoring the CUTLASS backend.",
str(e),
)

View File

@ -58,10 +58,10 @@ class CuteDSLTemplate(KernelTemplate):
choices.append(self.generate(**kwargs))
return None
except NotImplementedError as e:
log.debug("CuteDSL template choice generation failed: %s", e)
log.debug("CuteDSL template choice generation failed: %s", e) # noqa: G200
return e
except Exception as e:
log.debug("CuteDSL template choice generation error: %s", e)
log.debug("CuteDSL template choice generation error: %s", e) # noqa: G200
return NotImplementedError(f"CuteDSL template failed: {e}")
def generate(self, **kwargs: Any) -> ChoiceCaller:

View File

@ -510,7 +510,7 @@ class CKGemmTemplate(CKTemplate):
torch.cuda.get_device_properties(X_meta.device).warp_size,
)
except Exception as e:
log.debug(
log.debug( # noqa: G200
"Failed to prefetch_stages for %s with exception %s", op.name, e
)
# be conservative here and disable the op

View File

@ -5638,7 +5638,7 @@ class TritonScheduling(SIMDScheduling):
except Exception as e:
if config.triton.disallow_failing_autotune_kernels_TESTING_ONLY:
raise
log.debug(
log.debug( # noqa: G200
"Exception (%s) in compiling fused nodes %s",
e,
node_names,

View File

@ -204,7 +204,7 @@ def estimate_nccl_collective_runtime_nccl_estimator(snode) -> Optional[float]:
torch.ops._c10d_functional.wait_tensor.default(w)
except Exception as e:
# NCCL estimator can fail
log.info(e)
log.info(e) # noqa: G200
return None
est_time_us = time_estimator.estimated_time

View File

@ -445,7 +445,7 @@ class _SerializedFxCompile(FxCompile):
# we can't cache (or serialize)
FxGraphCache._check_for_hop(gm)
except BypassFxGraphCache as e:
log.debug("Skipping %s compile: %s", type(self), e)
log.debug("Skipping %s compile: %s", type(self), e) # noqa: G200
return None
context = torch._guards.TracingContext.try_get()

View File

@ -284,8 +284,8 @@ class SubprocPool:
self.process.wait(300)
if self.log_file:
self.log_file.close()
except OSError as e:
log.warning("Ignored OSError in pool shutdown: %s", e)
except OSError:
log.warning("Ignored OSError in pool shutdown", exc_info=True)
finally:
with self.futures_lock:
for future in self.pending_futures.values():

View File

@ -207,7 +207,7 @@ def numeric_check_if_enabled(
precision=precision,
)
except Exception as e:
logger.warning(
logger.warning( # noqa: G200
"Runtime numeric check failed in pre grad fx passes with error: %s", e
)
traceback.print_exc()

View File

@ -913,8 +913,8 @@ def reorder_for_peak_memory(
try:
validate_graph_acyclic(nodes)
validate_unique_buffer_names(nodes, name_to_buf, name_to_freeable_input_buf)
except RuntimeError as e:
torch_log.error("Memory planning validation failed: %s", e)
except RuntimeError:
torch_log.exception("Memory planning validation failed")
if not is_fbcode(): # TODO: remove after ensuring OSS side is safe
raise
@ -942,8 +942,8 @@ def reorder_for_peak_memory(
PeakMemoryResult(order, peak_memory, method.__name__)
)
torch_log.info("%s peak memory: %d", method.__name__, peak_memory)
except Exception as e:
torch_log.error("Failed to reorder for %s: %s", method.__name__, e)
except Exception:
torch_log.exception("Failed to reorder for %s", method.__name__)
if not is_fbcode(): # TODO: remove after ensuring OSS side is safe
raise

View File

@ -238,7 +238,7 @@ class CoordescTuner:
try:
candidate_timing = self.call_func(func, candidate_config)
except Exception as e:
log.debug("Got exception %s", e)
log.debug("Got exception %s", e) # noqa: G200
return False, float("inf")
if self.has_improvement(best_timing, candidate_timing):

View File

@ -1618,7 +1618,7 @@ class StaticTritonCompileResult(CompileResult[StaticallyLaunchedCudaKernel]):
result = check_can_launch()
return result
except CannotStaticallyLaunchKernel as e:
log.info("Bypassing StaticallyLaunchedCudaKernel due to %s", str(e))
log.info("Bypassing StaticallyLaunchedCudaKernel due to %s", str(e)) # noqa: G200
if torch._inductor.config.strict_static_cuda_launcher:
raise e
return None
@ -1997,11 +1997,11 @@ def end_graph(output_file):
)
file.write(bw_info_str + "\n")
file.write(f"{summary_str}\n\n")
except Exception as e:
except Exception:
log.warning(
"failed to write profile bandwidth result into %s: %s",
"failed to write profile bandwidth result into %s",
output_file,
e,
exc_info=True,
)

View File

@ -896,11 +896,11 @@ class BaseSchedulerNode:
except ValueError as e:
# We don't know how to estimate runtime for this collective,
# falling back to 0
log.info(e)
log.info(e) # noqa: G200
return 0
except TypeError as e:
# this happens when the collective is not of type ir._CollectiveKernel
log.info(e)
log.info(e) # noqa: G200
return 0
elif is_wait(self.node):
@ -3366,7 +3366,7 @@ class Scheduler:
future.result()
except Exception as e:
if fusion_log.isEnabledFor(logging.DEBUG):
fusion_log.debug(
fusion_log.debug( # noqa: G200
"Exception in compiling %s: %s",
"prologue" if not epilogue_fusion else "epilogue",
str(e),
@ -3442,7 +3442,7 @@ class Scheduler:
# triton will unpredictably error with valid prologue fusions
except Exception as e:
if fusion_log.isEnabledFor(logging.DEBUG):
fusion_log.debug(
fusion_log.debug( # noqa: G200
"Exception in compiling %s: %s",
"prologue" if not epilogue_fusion else "epilogue",
str(e),

View File

@ -1702,7 +1702,7 @@ class TritonTemplate(KernelTemplate):
choices.append(choice)
return None
except NotImplementedError as e:
log.info(
log.info( # noqa: G200
"Cannot Append Choice: %s. KernelTemplate type is %s",
e,
type(self),
@ -3223,17 +3223,16 @@ class AlgorithmSelectorCache(PersistentCache):
for choice in choices:
try:
timing = cls.benchmark_choice(choice, autotune_args)
except CUDACompileError as e:
except CUDACompileError:
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
if not isinstance(choice, CUDATemplateCaller):
log.error(
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
e,
log.exception(
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice."
)
timing = float("inf")
except NotImplementedError as e:
log.warning("Not yet implemented: %s", e)
except NotImplementedError:
log.warning("Not yet implemented", exc_info=True)
timing = float("inf")
except RuntimeError as e:
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
@ -3266,7 +3265,7 @@ class AlgorithmSelectorCache(PersistentCache):
from triton.runtime.autotuner import OutOfResources
if isinstance(e, OutOfResources):
log.warning(e)
log.warning(e) # noqa: G200
timing = float("inf")
else:
raise e

View File

@ -224,11 +224,11 @@ class TritonBundler:
# Make sure the cubin path exists and is valid
for compile_result in result.kernel.compile_results:
compile_result.reload_cubin_path()
except RuntimeError as e:
except RuntimeError:
log.warning(
"Failed to reload cubin file statically launchable autotuner %s: %s",
"Failed to reload cubin file statically launchable autotuner %s",
result.kernel_name,
e,
exc_info=True,
)
continue
# We make a future instead of returning the kernel here so that

View File

@ -21,7 +21,7 @@ class FakeScriptObject:
with _disable_current_modes():
self.real_obj = copy.deepcopy(x)
except RuntimeError as e:
log.warning(
log.warning( # noqa: G200
"Unable to deepcopy the custom object %s due to %s. "
"Defaulting to the user given object. This might be "
"dangerous as side effects may be directly applied "

View File

@ -2568,7 +2568,7 @@ class FakeTensorMode(TorchDispatchMode):
# we shouldn't broadly catch all errors here;
# some come from real-kernel mutation/aliasing checks we want to run.
# add more exception types as needed.
log.debug(
log.debug( # noqa: G200
"real-tensor fallback failed for %s: %s; silently ignoring",
func,
exc,

View File

@ -224,7 +224,7 @@ class CheckpointProcess:
)
)
parent_pipe.close()
logger.error("Subprocess terminated due to exception: %s", e)
logger.exception("Subprocess terminated due to exception")
def _send(self, request_type: RequestType, payload: dict[str, Any]) -> None:
try:
@ -238,8 +238,8 @@ class CheckpointProcess:
)
except OSError as e:
error_msg = "Child process terminated unexpectedly"
logger.error(
"Communication failed during %s request: %s", request_type.value, e
logger.exception(
"Communication failed during %s request", request_type.value
)
raise RuntimeError(error_msg) from e
@ -354,10 +354,8 @@ class CheckpointProcess:
)
self.process.processes[0].kill()
logger.info("Subprocess killed forcefully")
except ProcessExitedException as e:
logger.error(
"ProcessExitedException during subprocess termination: %s", e
)
except ProcessExitedException:
logger.exception("ProcessExitedException during subprocess termination")
raise
logger.debug("CheckpointProcess closed successfully")

View File

@ -972,7 +972,7 @@ def _store_based_barrier(
except RuntimeError as e:
worker_count = store.add(store_key, 0)
# Print status periodically to keep track.
logger.debug(
logger.debug( # noqa: G200
"Waiting in store based barrier to initialize process group for %s seconds"
"rank: %s, key: %s (world_size=%s, num_workers_joined=%s, timeout=%s error=%s)",
time.time() - start,

View File

@ -721,7 +721,7 @@ class SimpleElasticAgent(ElasticAgent):
self._record_worker_events(result)
return result
except RendezvousGracefulExitError as e:
logger.info("Rendezvous gracefully exited: %s", e)
logger.info("Rendezvous gracefully exited: %s", e) # noqa: G200
except SignalException as e:
logger.warning("Received %s death signal, shutting down workers", e.sigval)
self._shutdown(e.sigval)

View File

@ -489,11 +489,13 @@ class PContext(abc.ABC):
sig = getattr(signal, sig_name.strip())
signal.signal(sig, _terminate_process_handler)
logger.info("Registered signal handler for %s", sig_name)
except (AttributeError, ValueError) as e:
except (AttributeError, ValueError):
logger.warning(
"Failed to register signal handler for %s: %s", sig_name, e
"Failed to register signal handler for %s",
sig_name,
exc_info=True,
)
except RuntimeError as e:
except RuntimeError:
if IS_WINDOWS and sig_name.strip() in [
"SIGHUP",
"SIGQUIT",
@ -505,7 +507,9 @@ class PContext(abc.ABC):
)
else:
logger.warning(
"Failed to register signal handler for %s: %s", sig_name, e
"Failed to register signal handler for %s",
sig_name,
exc_info=True,
)
else:
logger.warning(

View File

@ -142,12 +142,11 @@ class TailLog:
try:
f.result()
except Exception as e:
logger.error(
"error in log tailor for %s%s. %s: %s",
logger.exception(
"error in log tailor for %s%s. %s",
self._name,
local_rank,
e.__class__.__qualname__,
e,
)
if self._threadpool:

View File

@ -208,8 +208,8 @@ class EtcdRendezvousHandler(RendezvousHandler):
try:
self.set_closed()
return True
except BaseException as e: # noqa: B036
logger.warning("Shutdown failed. Error occurred: %s", str(e))
except BaseException: # noqa: B036
logger.warning("Shutdown failed", exc_info=True)
return False
@ -333,7 +333,7 @@ class EtcdRendezvous:
# to avoid spamming etcd
# FIXME: there are a few things that fall under this like
# etcd.EtcdKeyNotFound, etc, which could be handled more explicitly.
logger.info("Rendezvous attempt failed, will retry. Reason: %s", e)
logger.info("Rendezvous attempt failed, will retry. Reason: %s", e) # noqa: G200
time.sleep(1)
def init_phase(self):

View File

@ -176,7 +176,7 @@ class EtcdServer:
except Exception as e:
curr_retries += 1
stop_etcd(self._etcd_proc)
logger.warning(
logger.warning( # noqa: G200
"Failed to start etcd server, got error: %s, retrying", str(e)
)
if curr_retries >= num_retries:

View File

@ -1734,7 +1734,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
# do the communication
_wait_batch_p2p(_batch_p2p(ops))
except Exception as e:
logger.error(
logger.error( # noqa: G200
"[Rank %s] pipeline schedule %s caught the following exception '%s' \
at time_step %s when running action %s",
self.rank,

View File

@ -295,8 +295,8 @@ def _barrier(worker_names):
"""
try:
_all_gather(None, set(worker_names))
except RuntimeError as ex:
logger.error("Failed to complete barrier, got error %s", ex)
except RuntimeError:
logger.exception("Failed to complete barrier")
@_require_initialized
@ -311,9 +311,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
try:
_all_gather(None, timeout=timeout)
except RuntimeError as ex:
logger.error(
"Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
)
logger.exception("Failed to respond to 'Shutdown Proceed' in time")
raise ex

View File

@ -448,8 +448,8 @@ def load(
f,
expected_opset_version=expected_opset_version,
)
except RuntimeError as e:
log.warning("Ran into the following error when deserializing: %s", e)
except RuntimeError:
log.warning("Ran into the following error when deserializing", exc_info=True)
pt2_contents = PT2ArchiveContents({}, {}, {})
if len(pt2_contents.exported_programs) > 0 or len(pt2_contents.extra_files) > 0:

View File

@ -83,8 +83,8 @@ def is_pt2_package(serialized_model: Union[bytes, str]) -> bool:
archive_format_path = f"{root_folder}/{ARCHIVE_FORMAT_PATH}"
if archive_format_path in zip_reader.namelist():
return zip_reader.read(archive_format_path) == b"pt2"
except Exception as ex:
logger.info("Model is not a PT2 package: %s", str(ex))
except Exception:
logger.info("Model is not a PT2 package")
return False

View File

@ -3209,8 +3209,8 @@ class DimConstraints:
self._dynamic_results.add(self._dcp.doprint(arg))
else:
self._dynamic_results.add(self._dcp.doprint(solution))
except (NotImplementedError, AssertionError) as e:
log.warning("Failed to reduce inequalities: %s", e)
except (NotImplementedError, AssertionError):
log.warning("Failed to reduce inequalities", exc_info=True)
for expr2 in exprs:
self._dynamic_results.add(self._dcp.doprint(expr2))

View File

@ -83,7 +83,7 @@ class OnnxDecompMeta:
# When the function is targeting an HOP, for example, it will accept
# functions as arguments and fail to generate an ONNX signature.
# In this case we set signature to None and dispatch to this function always.
logger.warning(
logger.warning( # noqa: G200
"Failed to infer the signature for function '%s' because '%s'"
"All nodes targeting `%s` will be dispatched to this function",
self.onnx_function,

View File

@ -317,12 +317,9 @@ class _VerificationInterpreter(torch.fx.Interpreter):
return result
try:
(onnx_result,) = self._onnx_program.compute_values([node_name], self._args)
except Exception as e:
except Exception:
logger.warning(
"Failed to compute value for node %s: %s",
node_name,
e,
exc_info=True,
"Failed to compute value for node %s", node_name, exc_info=True
)
return result
info = VerificationInfo.from_tensors(

View File

@ -875,7 +875,7 @@ class MultiProcessTestCase(TestCase):
try:
getattr(self, test_name)()
except unittest.SkipTest as se:
logger.info(
logger.info( # noqa: G200
"Process %s skipping test %s for following reason: %s",
self.rank,
test_name,
@ -917,11 +917,10 @@ class MultiProcessTestCase(TestCase):
try:
pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
pipes.append((i, pipe))
except ConnectionError as e:
logger.error(
"Encountered error while trying to get traceback for process %s: %s",
except ConnectionError:
logger.exception(
"Encountered error while trying to get traceback for process %s",
i,
e,
)
# Wait for results.
@ -944,11 +943,10 @@ class MultiProcessTestCase(TestCase):
logger.error(
"Could not retrieve traceback for timed out process: %s", rank
)
except ConnectionError as e:
logger.error(
"Encountered error while trying to get traceback for process %s: %s",
except ConnectionError:
logger.exception(
"Encountered error while trying to get traceback for process %s",
rank,
e,
)
def _join_processes(self, fn) -> None: