mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 04:44:13 +08:00
Enable all flake8-logging-format rules (#164655)
These rules are enabled by removing existing suppressions. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164655 Approved by: https://github.com/janeyx99, https://github.com/mlazos
This commit is contained in:
committed by
PyTorch MergeBot
parent
c4f6619330
commit
3255e7872b
@ -57,8 +57,8 @@ def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules
|
||||
logger.info("Successfully cloned %s", target)
|
||||
return r, commit
|
||||
|
||||
except GitCommandError as e:
|
||||
logger.error("Git operation failed: %s", e)
|
||||
except GitCommandError:
|
||||
logger.exception("Git operation failed")
|
||||
raise
|
||||
|
||||
|
||||
|
2
.flake8
2
.flake8
@ -13,8 +13,6 @@ ignore =
|
||||
EXE001,
|
||||
# these ignores are from flake8-bugbear; please fix!
|
||||
B007,B008,B017,B019,B023,B028,B903,B905,B906,B907,B908,B910
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
G100,G101,G200
|
||||
# these ignores are from flake8-simplify. please fix or ignore with commented reason
|
||||
SIM105,SIM108,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
|
||||
# SIM104 is already covered by pyupgrade ruff
|
||||
|
@ -1751,8 +1751,8 @@ def maybe_snapshot_memory(should_snapshot_memory, suffix):
|
||||
f"{output_filename.rstrip('.csv')}_{suffix}.pickle",
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
log.error("Failed to save memory snapshot, %s", e)
|
||||
except Exception:
|
||||
log.exception("Failed to save memory snapshot")
|
||||
|
||||
torch.cuda.memory._record_memory_history(enabled=None)
|
||||
|
||||
|
@ -296,8 +296,8 @@ class OperatorInputsLoader:
|
||||
for key in self.operator_db.keys():
|
||||
try:
|
||||
op = eval(key)
|
||||
except AttributeError as ae:
|
||||
log.warning("Evaluating an op name into an OpOverload: %s", ae)
|
||||
except AttributeError:
|
||||
log.warning("Evaluating an op name into an OpOverload", exc_info=True)
|
||||
continue
|
||||
yield op
|
||||
|
||||
|
@ -159,8 +159,6 @@ ignore = [
|
||||
"EXE001",
|
||||
"F405",
|
||||
"FURB122", # writelines
|
||||
# these ignores are from flake8-logging-format; please fix!
|
||||
"G101",
|
||||
# these ignores are from ruff NPY; please fix!
|
||||
"NPY002",
|
||||
# these ignores are from ruff PERF; please fix!
|
||||
|
@ -72,7 +72,7 @@ try:
|
||||
except ImportError as e:
|
||||
# In FBCode we separate FX out into a separate target for the sake of dev
|
||||
# velocity. These are covered by a separate test target `quantization_fx`
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
# PyTorch 2 Export Quantization
|
||||
try:
|
||||
@ -94,7 +94,7 @@ try:
|
||||
except ImportError as e:
|
||||
# In FBCode we separate PT2 out into a separate target for the sake of dev
|
||||
# velocity. These are covered by a separate test target `quantization_pt2e`
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
try:
|
||||
from quantization.fx.test_numeric_suite_fx import TestFXGraphMatcher # noqa: F401
|
||||
@ -103,7 +103,7 @@ try:
|
||||
from quantization.fx.test_numeric_suite_fx import TestFXNumericSuiteNShadows # noqa: F401
|
||||
from quantization.fx.test_numeric_suite_fx import TestFXNumericSuiteCoreAPIsModels # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
# Test the model report module
|
||||
try:
|
||||
@ -115,19 +115,19 @@ try:
|
||||
from quantization.fx.test_model_report_fx import TestFxDetectOutliers # noqa: F401
|
||||
from quantization.fx.test_model_report_fx import TestFxModelReportVisualizer # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
# Equalization for FX mode
|
||||
try:
|
||||
from quantization.fx.test_equalize_fx import TestEqualizeFx # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
# Backward Compatibility. Tests serialization and BC for quantized modules.
|
||||
try:
|
||||
from quantization.bc.test_backward_compatibility import TestSerialization # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
# JIT Graph Mode Quantization
|
||||
from quantization.jit.test_quantize_jit import TestQuantizeJit # noqa: F401
|
||||
@ -146,29 +146,29 @@ from quantization.ao_migration.test_ao_migration import TestAOMigrationNNIntrins
|
||||
try:
|
||||
from quantization.ao_migration.test_quantization_fx import TestAOMigrationQuantizationFx # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
# Experimental functionality
|
||||
try:
|
||||
from quantization.core.experimental.test_bits import TestBitsCPU # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
try:
|
||||
from quantization.core.experimental.test_bits import TestBitsCUDA # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
try:
|
||||
from quantization.core.experimental.test_floatx import TestFloat8DtypeCPU # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
try:
|
||||
from quantization.core.experimental.test_floatx import TestFloat8DtypeCUDA # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
try:
|
||||
from quantization.core.experimental.test_floatx import TestFloat8DtypeCPUOnlyCPU # noqa: F401
|
||||
except ImportError as e:
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa:G200
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
@ -73,7 +73,7 @@ def run_command(
|
||||
if remaining_retries == 0:
|
||||
raise err
|
||||
remaining_retries -= 1
|
||||
logging.warning(
|
||||
logging.warning( # noqa: G200
|
||||
"(%s/%s) Retrying because command failed with: %r",
|
||||
retries - remaining_retries,
|
||||
retries,
|
||||
|
@ -172,7 +172,7 @@ def run_command(
|
||||
):
|
||||
raise err
|
||||
remaining_retries -= 1
|
||||
logging.warning(
|
||||
logging.warning( # noqa: G200
|
||||
"(%s/%s) Retrying because command failed with: %r",
|
||||
retries - remaining_retries,
|
||||
retries,
|
||||
|
@ -112,7 +112,7 @@ def run_command(
|
||||
if remaining_retries == 0:
|
||||
raise err
|
||||
remaining_retries -= 1
|
||||
logging.warning(
|
||||
logging.warning( # noqa: G200
|
||||
"(%s/%s) Retrying because command failed with: %r",
|
||||
retries - remaining_retries,
|
||||
retries,
|
||||
|
@ -95,8 +95,8 @@ Deleting %s just to be safe.
|
||||
|
||||
try:
|
||||
binary_path.unlink()
|
||||
except OSError as e:
|
||||
logging.critical("Failed to delete binary: %s", e)
|
||||
except OSError:
|
||||
logging.critical("Failed to delete binary", exc_info=True)
|
||||
logging.critical(
|
||||
"Delete this binary as soon as possible and do not execute it!"
|
||||
)
|
||||
|
@ -114,7 +114,7 @@ def _find_manylinux_interpreters() -> list[str]:
|
||||
)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.debug("Failed to get version for %s: %s", python_path, e)
|
||||
logger.debug("Failed to get version for %s: %s", python_path, e) # noqa:G200
|
||||
continue
|
||||
return interpreters
|
||||
|
||||
|
@ -1215,7 +1215,7 @@ def compile_frame( # type: ignore[return]
|
||||
except exc.SkipFrame as e:
|
||||
if not isinstance(e, exc.TensorifyScalarRestartAnalysis):
|
||||
TensorifyState.clear()
|
||||
log.debug(
|
||||
log.debug( # noqa: G200
|
||||
"Skipping frame %s %s \
|
||||
%s %s",
|
||||
e,
|
||||
|
@ -753,8 +753,10 @@ class _TorchDynamoContext:
|
||||
fn, result.dynamo, ignore_inlined_sources=False
|
||||
)
|
||||
self._package.install(result.backends)
|
||||
except RuntimeError as e:
|
||||
log.warning("Failed to load entry from dynamo cache: %s", e)
|
||||
except RuntimeError:
|
||||
log.warning(
|
||||
"Failed to load entry from dynamo cache", exc_info=True
|
||||
)
|
||||
self._package.initialize(fn, None, ignore_inlined_sources=False)
|
||||
|
||||
fn = innermost_fn(fn)
|
||||
|
@ -532,8 +532,8 @@ def _load_gb_type_to_gb_id_map() -> dict[str, Any]:
|
||||
)
|
||||
with open(registry_path) as f:
|
||||
registry = json.load(f)
|
||||
except Exception as e:
|
||||
log.error("Error accessing the registry file: %s", e)
|
||||
except Exception:
|
||||
log.exception("Error accessing the registry file")
|
||||
registry = {}
|
||||
|
||||
mapping = {}
|
||||
|
@ -269,7 +269,7 @@ class GraphRegionTracker:
|
||||
duplicates.append(node)
|
||||
self.node_to_duplicates[node] = duplicates
|
||||
except NodeHashException as e:
|
||||
log.debug("Unable to hash node %s with exception %s", node, e)
|
||||
log.debug("Unable to hash node %s with exception %s", node, e) # noqa: G200
|
||||
|
||||
def track_node_mutations(
|
||||
self,
|
||||
|
@ -1122,9 +1122,9 @@ class DiskDynamoCache(DiskDynamoStore):
|
||||
result = super().load_cache_entry(key)
|
||||
counters["dynamo_cache"]["dynamo_cache_hit"] += 1
|
||||
return result
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
counters["dynamo_cache"]["dynamo_cache_error"] += 1
|
||||
logger.warning("Failed to load package from path %s: %s", path, str(e))
|
||||
logger.warning("Failed to load package from path %s", exc_info=True)
|
||||
return None
|
||||
logger.info("No package found for %s", key)
|
||||
counters["dynamo_cache"]["dynamo_cache_miss"] += 1
|
||||
|
@ -203,7 +203,7 @@ class PrecompileContext:
|
||||
if result is not None:
|
||||
precompile_cache_entries[key] = result
|
||||
except Exception as e:
|
||||
logger.warning("Failed to create cache entry %s: %s", key, str(e))
|
||||
logger.warning("Failed to create cache entry %s", key, exc_info=True)
|
||||
|
||||
error = e
|
||||
data = json.dumps(
|
||||
|
@ -1041,7 +1041,7 @@ class BuiltinVariable(VariableTracker):
|
||||
except TypeError as e:
|
||||
has_constant_handler = obj.has_constant_handler(args, kwargs)
|
||||
if not has_constant_handler:
|
||||
log.warning(
|
||||
log.warning( # noqa: G200
|
||||
"incorrect arg count %s %s and no constant handler",
|
||||
self_handler,
|
||||
e,
|
||||
@ -1560,9 +1560,9 @@ class BuiltinVariable(VariableTracker):
|
||||
try:
|
||||
# Only supports certain function types
|
||||
user_func_variable = variables.UserFunctionVariable(bound_method)
|
||||
except AssertionError as e:
|
||||
except AssertionError:
|
||||
# Won't be able to do inline the str method, return to avoid graph break
|
||||
log.warning("Failed to create UserFunctionVariable: %s", e)
|
||||
log.warning("Failed to create UserFunctionVariable", exc_info=True)
|
||||
return
|
||||
|
||||
# Inline the user function
|
||||
|
@ -1183,7 +1183,7 @@ def speculate_subgraph(
|
||||
f"fall back to eager-mode PyTorch, which could lead to a slowdown."
|
||||
)
|
||||
log.info(msg)
|
||||
log.info(ex)
|
||||
log.info(ex) # noqa: G200
|
||||
raise ex
|
||||
|
||||
|
||||
|
@ -1221,7 +1221,7 @@ class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]):
|
||||
except Exception as e:
|
||||
cache_key = None
|
||||
counters["aot_autograd"]["autograd_cache_bypass"] += 1
|
||||
log.info("Bypassing autograd cache due to: %s", e)
|
||||
log.info("Bypassing autograd cache due to: %s", e) # noqa: G200
|
||||
cache_state = "bypass"
|
||||
cache_event_time = time.time_ns()
|
||||
cache_info["cache_bypass_reason"] = str(e)
|
||||
@ -1368,7 +1368,7 @@ class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]):
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
log.info("AOTAutograd cache unable to load compiled graph: %s", e)
|
||||
log.info("AOTAutograd cache unable to load compiled graph: %s", e) # noqa: G200
|
||||
if config.strict_autograd_cache:
|
||||
raise e
|
||||
if entry is not None:
|
||||
@ -1414,12 +1414,12 @@ class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]):
|
||||
counters["aot_autograd"]["autograd_cache_saved"] += 1
|
||||
except BypassAOTAutogradCache as e:
|
||||
counters["aot_autograd"]["autograd_cache_bypass"] += 1
|
||||
log.info("Bypassing autograd cache due to: %s", e)
|
||||
log.info("Bypassing autograd cache due to: %s", e) # noqa: G200
|
||||
if remote:
|
||||
log_cache_bypass("bypass_aot_autograd", str(e))
|
||||
return None
|
||||
except Exception as e:
|
||||
log.info("AOTAutograd cache unable to serialize compiled graph: %s", e)
|
||||
log.info("AOTAutograd cache unable to serialize compiled graph: %s", e) # noqa: G200
|
||||
if remote:
|
||||
log_cache_bypass(
|
||||
"bypass_aot_autograd", "Unable to serialize: " + str(e)
|
||||
|
@ -1516,7 +1516,7 @@ class FxGraphCache(GuardedCache[CompiledFxGraph]):
|
||||
)
|
||||
except BypassFxGraphCache as e:
|
||||
counters["inductor"]["fxgraph_cache_bypass"] += 1
|
||||
log.info("Bypassing FX Graph Cache because '%s'", e)
|
||||
log.info("Bypassing FX Graph Cache because '%s'", e) # noqa: G200
|
||||
if remote:
|
||||
log_cache_bypass("bypass_fx_graph", str(e))
|
||||
cache_info = {
|
||||
|
@ -2493,7 +2493,7 @@ class KernelTemplate:
|
||||
choices.append(self.generate(**kwargs))
|
||||
return None
|
||||
except NotImplementedError as e:
|
||||
log.info(
|
||||
log.info( # noqa: G200
|
||||
"Cannot Append Choice: %s. KernelTemplate type is %s",
|
||||
e,
|
||||
type(self),
|
||||
|
@ -22,8 +22,8 @@ def get_cuda_arch() -> Optional[str]:
|
||||
major, minor = torch.cuda.get_device_capability(0)
|
||||
return str(major * 10 + minor)
|
||||
return str(cuda_arch)
|
||||
except Exception as e:
|
||||
log.error("Error getting cuda arch: %s", e)
|
||||
except Exception:
|
||||
log.exception("Error getting cuda arch")
|
||||
return None
|
||||
|
||||
|
||||
@ -45,8 +45,8 @@ def get_cuda_version() -> Optional[str]:
|
||||
if cuda_version is None:
|
||||
cuda_version = torch.version.cuda
|
||||
return cuda_version
|
||||
except Exception as e:
|
||||
log.error("Error getting cuda version: %s", e)
|
||||
except Exception:
|
||||
log.exception("Error getting cuda version")
|
||||
return None
|
||||
|
||||
|
||||
|
@ -94,11 +94,11 @@ def maybe_fetch_ops() -> Optional[list[Any]]:
|
||||
assert isinstance(serialized_ops, list), (
|
||||
f"Expected serialized ops is a list, got {type(serialized_ops)}"
|
||||
)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
log.warning(
|
||||
"Failed to load CUTLASS config %s from local cache: %s",
|
||||
"Failed to load CUTLASS config %s from local cache",
|
||||
filename,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
serialized_ops = None
|
||||
elif config.is_fbcode():
|
||||
|
@ -53,8 +53,8 @@ def move_cutlass_compiled_cache() -> None:
|
||||
filename = os.path.basename(cutlass_cppgen.CACHE_FILE)
|
||||
shutil.move(cutlass_cppgen.CACHE_FILE, os.path.join(cache_dir(), filename))
|
||||
log.debug("Moved CUTLASS compiled cache file to %s", cache_dir())
|
||||
except OSError as e:
|
||||
log.warning("Failed to move CUTLASS compiled cache file: %s", e)
|
||||
except OSError:
|
||||
log.warning("Failed to move CUTLASS compiled cache file", exc_info=True)
|
||||
|
||||
|
||||
def _rename_cutlass_import(content: str, cutlass_modules: list[str]) -> str:
|
||||
@ -79,7 +79,7 @@ def try_import_cutlass() -> bool:
|
||||
import cutlass_cppgen # type: ignore[import-not-found] # noqa: F401
|
||||
import cutlass_library # type: ignore[import-not-found]
|
||||
except ImportError as e:
|
||||
log.warning(
|
||||
log.warning( # noqa: G200
|
||||
"Failed to import CUTLASS packages in fbcode: %s, ignoring the CUTLASS backend.",
|
||||
str(e),
|
||||
)
|
||||
@ -164,7 +164,7 @@ def try_import_cutlass() -> bool:
|
||||
|
||||
return True
|
||||
except ImportError as e:
|
||||
log.debug(
|
||||
log.debug( # noqa: G200
|
||||
"Failed to import CUTLASS packages: %s, ignoring the CUTLASS backend.",
|
||||
str(e),
|
||||
)
|
||||
|
@ -58,10 +58,10 @@ class CuteDSLTemplate(KernelTemplate):
|
||||
choices.append(self.generate(**kwargs))
|
||||
return None
|
||||
except NotImplementedError as e:
|
||||
log.debug("CuteDSL template choice generation failed: %s", e)
|
||||
log.debug("CuteDSL template choice generation failed: %s", e) # noqa: G200
|
||||
return e
|
||||
except Exception as e:
|
||||
log.debug("CuteDSL template choice generation error: %s", e)
|
||||
log.debug("CuteDSL template choice generation error: %s", e) # noqa: G200
|
||||
return NotImplementedError(f"CuteDSL template failed: {e}")
|
||||
|
||||
def generate(self, **kwargs: Any) -> ChoiceCaller:
|
||||
|
@ -510,7 +510,7 @@ class CKGemmTemplate(CKTemplate):
|
||||
torch.cuda.get_device_properties(X_meta.device).warp_size,
|
||||
)
|
||||
except Exception as e:
|
||||
log.debug(
|
||||
log.debug( # noqa: G200
|
||||
"Failed to prefetch_stages for %s with exception %s", op.name, e
|
||||
)
|
||||
# be conservative here and disable the op
|
||||
|
@ -5638,7 +5638,7 @@ class TritonScheduling(SIMDScheduling):
|
||||
except Exception as e:
|
||||
if config.triton.disallow_failing_autotune_kernels_TESTING_ONLY:
|
||||
raise
|
||||
log.debug(
|
||||
log.debug( # noqa: G200
|
||||
"Exception (%s) in compiling fused nodes %s",
|
||||
e,
|
||||
node_names,
|
||||
|
@ -204,7 +204,7 @@ def estimate_nccl_collective_runtime_nccl_estimator(snode) -> Optional[float]:
|
||||
torch.ops._c10d_functional.wait_tensor.default(w)
|
||||
except Exception as e:
|
||||
# NCCL estimator can fail
|
||||
log.info(e)
|
||||
log.info(e) # noqa: G200
|
||||
return None
|
||||
|
||||
est_time_us = time_estimator.estimated_time
|
||||
|
@ -445,7 +445,7 @@ class _SerializedFxCompile(FxCompile):
|
||||
# we can't cache (or serialize)
|
||||
FxGraphCache._check_for_hop(gm)
|
||||
except BypassFxGraphCache as e:
|
||||
log.debug("Skipping %s compile: %s", type(self), e)
|
||||
log.debug("Skipping %s compile: %s", type(self), e) # noqa: G200
|
||||
return None
|
||||
|
||||
context = torch._guards.TracingContext.try_get()
|
||||
|
@ -284,8 +284,8 @@ class SubprocPool:
|
||||
self.process.wait(300)
|
||||
if self.log_file:
|
||||
self.log_file.close()
|
||||
except OSError as e:
|
||||
log.warning("Ignored OSError in pool shutdown: %s", e)
|
||||
except OSError:
|
||||
log.warning("Ignored OSError in pool shutdown", exc_info=True)
|
||||
finally:
|
||||
with self.futures_lock:
|
||||
for future in self.pending_futures.values():
|
||||
|
@ -207,7 +207,7 @@ def numeric_check_if_enabled(
|
||||
precision=precision,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
logger.warning( # noqa: G200
|
||||
"Runtime numeric check failed in pre grad fx passes with error: %s", e
|
||||
)
|
||||
traceback.print_exc()
|
||||
|
@ -913,8 +913,8 @@ def reorder_for_peak_memory(
|
||||
try:
|
||||
validate_graph_acyclic(nodes)
|
||||
validate_unique_buffer_names(nodes, name_to_buf, name_to_freeable_input_buf)
|
||||
except RuntimeError as e:
|
||||
torch_log.error("Memory planning validation failed: %s", e)
|
||||
except RuntimeError:
|
||||
torch_log.exception("Memory planning validation failed")
|
||||
if not is_fbcode(): # TODO: remove after ensuring OSS side is safe
|
||||
raise
|
||||
|
||||
@ -942,8 +942,8 @@ def reorder_for_peak_memory(
|
||||
PeakMemoryResult(order, peak_memory, method.__name__)
|
||||
)
|
||||
torch_log.info("%s peak memory: %d", method.__name__, peak_memory)
|
||||
except Exception as e:
|
||||
torch_log.error("Failed to reorder for %s: %s", method.__name__, e)
|
||||
except Exception:
|
||||
torch_log.exception("Failed to reorder for %s", method.__name__)
|
||||
if not is_fbcode(): # TODO: remove after ensuring OSS side is safe
|
||||
raise
|
||||
|
||||
|
@ -238,7 +238,7 @@ class CoordescTuner:
|
||||
try:
|
||||
candidate_timing = self.call_func(func, candidate_config)
|
||||
except Exception as e:
|
||||
log.debug("Got exception %s", e)
|
||||
log.debug("Got exception %s", e) # noqa: G200
|
||||
return False, float("inf")
|
||||
|
||||
if self.has_improvement(best_timing, candidate_timing):
|
||||
|
@ -1618,7 +1618,7 @@ class StaticTritonCompileResult(CompileResult[StaticallyLaunchedCudaKernel]):
|
||||
result = check_can_launch()
|
||||
return result
|
||||
except CannotStaticallyLaunchKernel as e:
|
||||
log.info("Bypassing StaticallyLaunchedCudaKernel due to %s", str(e))
|
||||
log.info("Bypassing StaticallyLaunchedCudaKernel due to %s", str(e)) # noqa: G200
|
||||
if torch._inductor.config.strict_static_cuda_launcher:
|
||||
raise e
|
||||
return None
|
||||
@ -1997,11 +1997,11 @@ def end_graph(output_file):
|
||||
)
|
||||
file.write(bw_info_str + "\n")
|
||||
file.write(f"{summary_str}\n\n")
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
log.warning(
|
||||
"failed to write profile bandwidth result into %s: %s",
|
||||
"failed to write profile bandwidth result into %s",
|
||||
output_file,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
|
@ -896,11 +896,11 @@ class BaseSchedulerNode:
|
||||
except ValueError as e:
|
||||
# We don't know how to estimate runtime for this collective,
|
||||
# falling back to 0
|
||||
log.info(e)
|
||||
log.info(e) # noqa: G200
|
||||
return 0
|
||||
except TypeError as e:
|
||||
# this happens when the collective is not of type ir._CollectiveKernel
|
||||
log.info(e)
|
||||
log.info(e) # noqa: G200
|
||||
return 0
|
||||
|
||||
elif is_wait(self.node):
|
||||
@ -3366,7 +3366,7 @@ class Scheduler:
|
||||
future.result()
|
||||
except Exception as e:
|
||||
if fusion_log.isEnabledFor(logging.DEBUG):
|
||||
fusion_log.debug(
|
||||
fusion_log.debug( # noqa: G200
|
||||
"Exception in compiling %s: %s",
|
||||
"prologue" if not epilogue_fusion else "epilogue",
|
||||
str(e),
|
||||
@ -3442,7 +3442,7 @@ class Scheduler:
|
||||
# triton will unpredictably error with valid prologue fusions
|
||||
except Exception as e:
|
||||
if fusion_log.isEnabledFor(logging.DEBUG):
|
||||
fusion_log.debug(
|
||||
fusion_log.debug( # noqa: G200
|
||||
"Exception in compiling %s: %s",
|
||||
"prologue" if not epilogue_fusion else "epilogue",
|
||||
str(e),
|
||||
|
@ -1702,7 +1702,7 @@ class TritonTemplate(KernelTemplate):
|
||||
choices.append(choice)
|
||||
return None
|
||||
except NotImplementedError as e:
|
||||
log.info(
|
||||
log.info( # noqa: G200
|
||||
"Cannot Append Choice: %s. KernelTemplate type is %s",
|
||||
e,
|
||||
type(self),
|
||||
@ -3223,17 +3223,16 @@ class AlgorithmSelectorCache(PersistentCache):
|
||||
for choice in choices:
|
||||
try:
|
||||
timing = cls.benchmark_choice(choice, autotune_args)
|
||||
except CUDACompileError as e:
|
||||
except CUDACompileError:
|
||||
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
|
||||
|
||||
if not isinstance(choice, CUDATemplateCaller):
|
||||
log.error(
|
||||
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice.",
|
||||
e,
|
||||
log.exception(
|
||||
"CUDA compilation error during autotuning: \n%s. \nIgnoring this choice."
|
||||
)
|
||||
timing = float("inf")
|
||||
except NotImplementedError as e:
|
||||
log.warning("Not yet implemented: %s", e)
|
||||
except NotImplementedError:
|
||||
log.warning("Not yet implemented", exc_info=True)
|
||||
timing = float("inf")
|
||||
except RuntimeError as e:
|
||||
from torch._inductor.codegen.cuda.cuda_kernel import CUDATemplateCaller
|
||||
@ -3266,7 +3265,7 @@ class AlgorithmSelectorCache(PersistentCache):
|
||||
from triton.runtime.autotuner import OutOfResources
|
||||
|
||||
if isinstance(e, OutOfResources):
|
||||
log.warning(e)
|
||||
log.warning(e) # noqa: G200
|
||||
timing = float("inf")
|
||||
else:
|
||||
raise e
|
||||
|
@ -224,11 +224,11 @@ class TritonBundler:
|
||||
# Make sure the cubin path exists and is valid
|
||||
for compile_result in result.kernel.compile_results:
|
||||
compile_result.reload_cubin_path()
|
||||
except RuntimeError as e:
|
||||
except RuntimeError:
|
||||
log.warning(
|
||||
"Failed to reload cubin file statically launchable autotuner %s: %s",
|
||||
"Failed to reload cubin file statically launchable autotuner %s",
|
||||
result.kernel_name,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
continue
|
||||
# We make a future instead of returning the kernel here so that
|
||||
|
@ -21,7 +21,7 @@ class FakeScriptObject:
|
||||
with _disable_current_modes():
|
||||
self.real_obj = copy.deepcopy(x)
|
||||
except RuntimeError as e:
|
||||
log.warning(
|
||||
log.warning( # noqa: G200
|
||||
"Unable to deepcopy the custom object %s due to %s. "
|
||||
"Defaulting to the user given object. This might be "
|
||||
"dangerous as side effects may be directly applied "
|
||||
|
@ -2568,7 +2568,7 @@ class FakeTensorMode(TorchDispatchMode):
|
||||
# we shouldn't broadly catch all errors here;
|
||||
# some come from real-kernel mutation/aliasing checks we want to run.
|
||||
# add more exception types as needed.
|
||||
log.debug(
|
||||
log.debug( # noqa: G200
|
||||
"real-tensor fallback failed for %s: %s; silently ignoring",
|
||||
func,
|
||||
exc,
|
||||
|
@ -224,7 +224,7 @@ class CheckpointProcess:
|
||||
)
|
||||
)
|
||||
parent_pipe.close()
|
||||
logger.error("Subprocess terminated due to exception: %s", e)
|
||||
logger.exception("Subprocess terminated due to exception")
|
||||
|
||||
def _send(self, request_type: RequestType, payload: dict[str, Any]) -> None:
|
||||
try:
|
||||
@ -238,8 +238,8 @@ class CheckpointProcess:
|
||||
)
|
||||
except OSError as e:
|
||||
error_msg = "Child process terminated unexpectedly"
|
||||
logger.error(
|
||||
"Communication failed during %s request: %s", request_type.value, e
|
||||
logger.exception(
|
||||
"Communication failed during %s request", request_type.value
|
||||
)
|
||||
raise RuntimeError(error_msg) from e
|
||||
|
||||
@ -354,10 +354,8 @@ class CheckpointProcess:
|
||||
)
|
||||
self.process.processes[0].kill()
|
||||
logger.info("Subprocess killed forcefully")
|
||||
except ProcessExitedException as e:
|
||||
logger.error(
|
||||
"ProcessExitedException during subprocess termination: %s", e
|
||||
)
|
||||
except ProcessExitedException:
|
||||
logger.exception("ProcessExitedException during subprocess termination")
|
||||
raise
|
||||
|
||||
logger.debug("CheckpointProcess closed successfully")
|
||||
|
@ -972,7 +972,7 @@ def _store_based_barrier(
|
||||
except RuntimeError as e:
|
||||
worker_count = store.add(store_key, 0)
|
||||
# Print status periodically to keep track.
|
||||
logger.debug(
|
||||
logger.debug( # noqa: G200
|
||||
"Waiting in store based barrier to initialize process group for %s seconds"
|
||||
"rank: %s, key: %s (world_size=%s, num_workers_joined=%s, timeout=%s error=%s)",
|
||||
time.time() - start,
|
||||
|
@ -721,7 +721,7 @@ class SimpleElasticAgent(ElasticAgent):
|
||||
self._record_worker_events(result)
|
||||
return result
|
||||
except RendezvousGracefulExitError as e:
|
||||
logger.info("Rendezvous gracefully exited: %s", e)
|
||||
logger.info("Rendezvous gracefully exited: %s", e) # noqa: G200
|
||||
except SignalException as e:
|
||||
logger.warning("Received %s death signal, shutting down workers", e.sigval)
|
||||
self._shutdown(e.sigval)
|
||||
|
@ -489,11 +489,13 @@ class PContext(abc.ABC):
|
||||
sig = getattr(signal, sig_name.strip())
|
||||
signal.signal(sig, _terminate_process_handler)
|
||||
logger.info("Registered signal handler for %s", sig_name)
|
||||
except (AttributeError, ValueError) as e:
|
||||
except (AttributeError, ValueError):
|
||||
logger.warning(
|
||||
"Failed to register signal handler for %s: %s", sig_name, e
|
||||
"Failed to register signal handler for %s",
|
||||
sig_name,
|
||||
exc_info=True,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
except RuntimeError:
|
||||
if IS_WINDOWS and sig_name.strip() in [
|
||||
"SIGHUP",
|
||||
"SIGQUIT",
|
||||
@ -505,7 +507,9 @@ class PContext(abc.ABC):
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Failed to register signal handler for %s: %s", sig_name, e
|
||||
"Failed to register signal handler for %s",
|
||||
sig_name,
|
||||
exc_info=True,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
|
@ -142,12 +142,11 @@ class TailLog:
|
||||
try:
|
||||
f.result()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"error in log tailor for %s%s. %s: %s",
|
||||
logger.exception(
|
||||
"error in log tailor for %s%s. %s",
|
||||
self._name,
|
||||
local_rank,
|
||||
e.__class__.__qualname__,
|
||||
e,
|
||||
)
|
||||
|
||||
if self._threadpool:
|
||||
|
@ -208,8 +208,8 @@ class EtcdRendezvousHandler(RendezvousHandler):
|
||||
try:
|
||||
self.set_closed()
|
||||
return True
|
||||
except BaseException as e: # noqa: B036
|
||||
logger.warning("Shutdown failed. Error occurred: %s", str(e))
|
||||
except BaseException: # noqa: B036
|
||||
logger.warning("Shutdown failed", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
@ -333,7 +333,7 @@ class EtcdRendezvous:
|
||||
# to avoid spamming etcd
|
||||
# FIXME: there are a few things that fall under this like
|
||||
# etcd.EtcdKeyNotFound, etc, which could be handled more explicitly.
|
||||
logger.info("Rendezvous attempt failed, will retry. Reason: %s", e)
|
||||
logger.info("Rendezvous attempt failed, will retry. Reason: %s", e) # noqa: G200
|
||||
time.sleep(1)
|
||||
|
||||
def init_phase(self):
|
||||
|
@ -176,7 +176,7 @@ class EtcdServer:
|
||||
except Exception as e:
|
||||
curr_retries += 1
|
||||
stop_etcd(self._etcd_proc)
|
||||
logger.warning(
|
||||
logger.warning( # noqa: G200
|
||||
"Failed to start etcd server, got error: %s, retrying", str(e)
|
||||
)
|
||||
if curr_retries >= num_retries:
|
||||
|
@ -1734,7 +1734,7 @@ class PipelineScheduleMulti(_PipelineSchedule):
|
||||
# do the communication
|
||||
_wait_batch_p2p(_batch_p2p(ops))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
logger.error( # noqa: G200
|
||||
"[Rank %s] pipeline schedule %s caught the following exception '%s' \
|
||||
at time_step %s when running action %s",
|
||||
self.rank,
|
||||
|
@ -295,8 +295,8 @@ def _barrier(worker_names):
|
||||
"""
|
||||
try:
|
||||
_all_gather(None, set(worker_names))
|
||||
except RuntimeError as ex:
|
||||
logger.error("Failed to complete barrier, got error %s", ex)
|
||||
except RuntimeError:
|
||||
logger.exception("Failed to complete barrier")
|
||||
|
||||
|
||||
@_require_initialized
|
||||
@ -311,9 +311,7 @@ def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT):
|
||||
try:
|
||||
_all_gather(None, timeout=timeout)
|
||||
except RuntimeError as ex:
|
||||
logger.error(
|
||||
"Failed to respond to 'Shutdown Proceed' in time, got error %s", ex
|
||||
)
|
||||
logger.exception("Failed to respond to 'Shutdown Proceed' in time")
|
||||
raise ex
|
||||
|
||||
|
||||
|
@ -448,8 +448,8 @@ def load(
|
||||
f,
|
||||
expected_opset_version=expected_opset_version,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
log.warning("Ran into the following error when deserializing: %s", e)
|
||||
except RuntimeError:
|
||||
log.warning("Ran into the following error when deserializing", exc_info=True)
|
||||
pt2_contents = PT2ArchiveContents({}, {}, {})
|
||||
|
||||
if len(pt2_contents.exported_programs) > 0 or len(pt2_contents.extra_files) > 0:
|
||||
|
@ -83,8 +83,8 @@ def is_pt2_package(serialized_model: Union[bytes, str]) -> bool:
|
||||
archive_format_path = f"{root_folder}/{ARCHIVE_FORMAT_PATH}"
|
||||
if archive_format_path in zip_reader.namelist():
|
||||
return zip_reader.read(archive_format_path) == b"pt2"
|
||||
except Exception as ex:
|
||||
logger.info("Model is not a PT2 package: %s", str(ex))
|
||||
except Exception:
|
||||
logger.info("Model is not a PT2 package")
|
||||
return False
|
||||
|
||||
|
||||
|
@ -3209,8 +3209,8 @@ class DimConstraints:
|
||||
self._dynamic_results.add(self._dcp.doprint(arg))
|
||||
else:
|
||||
self._dynamic_results.add(self._dcp.doprint(solution))
|
||||
except (NotImplementedError, AssertionError) as e:
|
||||
log.warning("Failed to reduce inequalities: %s", e)
|
||||
except (NotImplementedError, AssertionError):
|
||||
log.warning("Failed to reduce inequalities", exc_info=True)
|
||||
for expr2 in exprs:
|
||||
self._dynamic_results.add(self._dcp.doprint(expr2))
|
||||
|
||||
|
@ -83,7 +83,7 @@ class OnnxDecompMeta:
|
||||
# When the function is targeting an HOP, for example, it will accept
|
||||
# functions as arguments and fail to generate an ONNX signature.
|
||||
# In this case we set signature to None and dispatch to this function always.
|
||||
logger.warning(
|
||||
logger.warning( # noqa: G200
|
||||
"Failed to infer the signature for function '%s' because '%s'"
|
||||
"All nodes targeting `%s` will be dispatched to this function",
|
||||
self.onnx_function,
|
||||
|
@ -317,12 +317,9 @@ class _VerificationInterpreter(torch.fx.Interpreter):
|
||||
return result
|
||||
try:
|
||||
(onnx_result,) = self._onnx_program.compute_values([node_name], self._args)
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to compute value for node %s: %s",
|
||||
node_name,
|
||||
e,
|
||||
exc_info=True,
|
||||
"Failed to compute value for node %s", node_name, exc_info=True
|
||||
)
|
||||
return result
|
||||
info = VerificationInfo.from_tensors(
|
||||
|
@ -875,7 +875,7 @@ class MultiProcessTestCase(TestCase):
|
||||
try:
|
||||
getattr(self, test_name)()
|
||||
except unittest.SkipTest as se:
|
||||
logger.info(
|
||||
logger.info( # noqa: G200
|
||||
"Process %s skipping test %s for following reason: %s",
|
||||
self.rank,
|
||||
test_name,
|
||||
@ -917,11 +917,10 @@ class MultiProcessTestCase(TestCase):
|
||||
try:
|
||||
pipe.send(MultiProcessTestCase.Event.GET_TRACEBACK)
|
||||
pipes.append((i, pipe))
|
||||
except ConnectionError as e:
|
||||
logger.error(
|
||||
"Encountered error while trying to get traceback for process %s: %s",
|
||||
except ConnectionError:
|
||||
logger.exception(
|
||||
"Encountered error while trying to get traceback for process %s",
|
||||
i,
|
||||
e,
|
||||
)
|
||||
|
||||
# Wait for results.
|
||||
@ -944,11 +943,10 @@ class MultiProcessTestCase(TestCase):
|
||||
logger.error(
|
||||
"Could not retrieve traceback for timed out process: %s", rank
|
||||
)
|
||||
except ConnectionError as e:
|
||||
logger.error(
|
||||
"Encountered error while trying to get traceback for process %s: %s",
|
||||
except ConnectionError:
|
||||
logger.exception(
|
||||
"Encountered error while trying to get traceback for process %s",
|
||||
rank,
|
||||
e,
|
||||
)
|
||||
|
||||
def _join_processes(self, fn) -> None:
|
||||
|
Reference in New Issue
Block a user