[1/N] Fix ruff warnings (#164333)

Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/164333 Approved by: https://github.com/albanD
2025-10-21 05:34:18 +08:00 · 2025-10-01 16:48:29 +00:00
parent e419dc6d08
commit f7ab8a2710
26 changed files with 32 additions and 63 deletions
--- a/torch/_C/_distributed_c10d.pyi
+++ b/torch/_C/_distributed_c10d.pyi
@ -734,7 +734,7 @@ def _allow_inflight_collective_as_graph_input() -> bool: ...
 def _unregister_all_process_groups() -> None: ...
 def _unregister_process_group(group_name: str) -> None: ...

-# Initializes the device state in CUmodule so that it’s able to perform NVSHMEM
+# Initializes the device state in CUmodule so that it's able to perform NVSHMEM
 # operations.  CUmodule is a pointer to a CUDA module, carried by a int64 in
 # Python. At C++ interface, it is converted to a uintptr_t.
 def _nvshmemx_cumodule_init(module: int) -> None: ...
--- a/torch/_dynamo/guards.py
+++ b/torch/_dynamo/guards.py
@ -392,7 +392,7 @@ class GuardManagerWrapper:
        -----------------------------------------------------------------------
        A ``tag safe root`` is a tag safe node whose parent is not tag safe.
        These boundary nodes mark the points where guard evaluation can safely
-        prune traversal: if a tag-safe root’s dictionary tag matches, the entire
+        prune traversal: if a tag-safe root's dictionary tag matches, the entire
        subtree beneath it is skipped.

        One strong requirement for tag safe root is for the guarded object to
@ -544,12 +544,12 @@ class GuardManagerWrapper:
                and node.get_source().endswith(dunder_attrs_assumed_constants)
                and config.assume_dunder_attributes_remain_unchanged
            ):
-                # We trust tuples obtained from a function’s __closure__ or
+                # We trust tuples obtained from a function's __closure__ or
                # __defaults__. Any *other* tuple-valued attribute can be
                # silently replaced—for example:
                #
                #     foo.bar = (1, 2)      # original
-                #     foo.bar = (3, 4)      # rebinding that our dict-tag optimisation won’t see
+                #     foo.bar = (3, 4)      # rebinding that our dict-tag optimisation won't see
                #
                # Therefore only tuples from __closure__ / __defaults__ participate in the
                # recursive-dict-tag optimization; all others are ignored.
@ -3870,13 +3870,13 @@ class CheckFunctionManager:
            )

        # Note - On Lambda guarding of object aliasing
-        # We previously installed object‑aliasing guards as relational guards,
-        # but that undermined the recursive‑dict guard optimization: placing the
+        # We previously installed object-aliasing guards as relational guards,
+        # but that undermined the recursive-dict guard optimization: placing the
        # aliasing guard at a leaf prevented the parent dict node from
-        # qualifying as a recursive‑dict guard root. Because aliasing guards are
+        # qualifying as a recursive-dict guard root. Because aliasing guards are
        # rare, we now emit them as epilogue guards via a small Python lambda.
        # This repeats the access in Python—adding a bit of work—but the
-        # overhead is outweighed by the gains from enabling recursive‑dict guard
+        # overhead is outweighed by the gains from enabling recursive-dict guard
        # optimization.
        if (
            config.use_lamba_guard_for_object_aliasing
--- a/torch/_dynamo/variables/functions.py
+++ b/torch/_dynamo/variables/functions.py
@ -104,7 +104,7 @@ CO_VARARGS = 0x04
 CO_VARKEYWORDS = 0x08


-# Module‐level cache keyed by the function object
+# Module-level cache keyed by the function object
 _spec_cache = WeakKeyDictionary()


@ -133,7 +133,7 @@ class FunctionSpec:
        self.defaults = func.__defaults__ or ()
        self.kwdefaults = func.__kwdefaults__ or {}

-        # Map positional‐default names → their index in self.defaults
+        # Map positional-default names → their index in self.defaults
        self.pos_default_map = dict(
            zip(self.all_pos_names[-len(self.defaults) :], range(len(self.defaults)))
        )
@ -879,7 +879,7 @@ class LocalGeneratorObjectVariable(VariableTracker):
            retval = self.next_variable(tx)

            # The exception raised before is still active. We need to check the exception
-            # table one more time to find the next target. But why? Let’s walk
+            # table one more time to find the next target. But why? Let's walk
            # through an example and its generated bytecode: https://godbolt.org/z/ebdTbMv8M
            #
            #     z = 0
@ -1075,7 +1075,7 @@ class UserMethodVariable(UserFunctionVariable):
        # One way is to simplly use `__func__` to unwrap it.
        #
        # For recursive dict-tag optimizations, it can be faster to fetch the
-        # function directly from `cls.__dict__`; that’s why we pass on
+        # function directly from `cls.__dict__`; that's why we pass on
        # `source_fn`. Whenever it is possible to access the function from
        # cls.__dict__, we pass that on to `source_fn`. Because bind_args
        # operates on the unbound function, most guards should target
--- a/torch/_functorch/_activation_checkpointing/knapsack.py
+++ b/torch/_functorch/_activation_checkpointing/knapsack.py
@ -69,12 +69,12 @@ def dp_knapsack(

    # Quantize the memory weights
    quantized_memory = torch.tensor(
-        [int(round(m * S)) for m in memory], dtype=torch.long, device="cpu"
+        [round(m * S) for m in memory], dtype=torch.long, device="cpu"
    )
    runtimes = torch.tensor(runtime, dtype=torch.float32, device="cpu")

    # Quantized pseudopolynomial DP for 0-1 Knapsack
-    quantized_max_memory = int(round(max_memory * S))
+    quantized_max_memory = round(max_memory * S)

    n = len(memory)

--- a/torch/_inductor/codegen/cpp.py
+++ b/torch/_inductor/codegen/cpp.py
@ -4197,8 +4197,6 @@ class CppKernelProxy(CppKernel):
                                    to_type_node, lambda n: n is not to_type_node
                                )
                                metrics.cpp_to_dtype_count += 1
-                else:
-                    pass

            def eliminate_to_dtype(sub_graph: torch.fx.Graph):
                def _eliminate_duplicate_to_node(sub_graph: torch.fx.Graph):
--- a/torch/_inductor/cpp_builder.py
+++ b/torch/_inductor/cpp_builder.py
@ -813,8 +813,6 @@ def _get_os_related_cpp_definitions(cpp_compiler: str) -> list[str]:
        # On Windows, we need disable min/max macro to avoid C2589 error, as PyTorch CMake:
        # https://github.com/pytorch/pytorch/blob/9a41570199155eee92ebd28452a556075e34e1b4/CMakeLists.txt#L1118-L1119
        os_definitions.append("NOMINMAX")
-    else:
-        pass
    return os_definitions


--- a/torch/_inductor/mkldnn_lowerings.py
+++ b/torch/_inductor/mkldnn_lowerings.py
@ -1348,5 +1348,3 @@ def register_onednn_fusion_ops():
                return result

        add_needs_realized_inputs(cpu_needs_realized_inputs)
-    else:
-        pass
--- a/torch/_subclasses/fake_impls.py
+++ b/torch/_subclasses/fake_impls.py
@ -157,8 +157,7 @@ def _is_op_registered_to_fake_rule(op):


 def _deregister_op_impl(op):
-    if op in op_implementations_dict:
-        del op_implementations_dict[op]
+    op_implementations_dict.pop(op, None)
    for check, impl in op_implementations_checks:
        if check is op:
            op_implementations_checks.remove((check, impl))
--- a/torch/_tensor_str.py
+++ b/torch/_tensor_str.py
@ -247,7 +247,7 @@ def _vector_str(self, indent, summarize, formatter1, formatter2=None):
        element_length += formatter2.width() + 1

    elements_per_line = max(
-        1, int(math.floor((PRINT_OPTS.linewidth - indent) / (element_length)))
+        1, math.floor((PRINT_OPTS.linewidth - indent) / (element_length))
    )

    def _val_formatter(val, formatter1=formatter1, formatter2=formatter2):
--- a/torch/_utils_internal.py
+++ b/torch/_utils_internal.py
@ -305,7 +305,7 @@ def deprecated():
    """

    def decorator(func: Callable[_P, _T]) -> Callable[_P, _T]:
-        # Validate naming convention – single leading underscore, not dunder
+        # Validate naming convention - single leading underscore, not dunder
        if not (func.__name__.startswith("_")):
            raise ValueError(
                "@deprecate must decorate a function whose name "
--- a/torch/ao/nn/quantized/reference/modules/utils.py
+++ b/torch/ao/nn/quantized/reference/modules/utils.py
@ -198,7 +198,7 @@ def _quantize_weight_decomposed(
    _DTYPE_TO_QVALUE_BOUNDS: dict[torch.dtype, tuple[int, int]] = {
        torch.uint8: (0, 255),
        torch.int8: (-128, 127),
-        torch.int32: (int(-(2**31)), int(2**31 - 1)),
+        torch.int32: ((-(2**31)), (2**31 - 1)),
    }

    # TODO: add an util function for converting qdtype to dtype
@ -261,7 +261,7 @@ def _dequantize_weight_decomposed(
    _DTYPE_TO_QVALUE_BOUNDS: dict[torch.dtype, tuple[int, int]] = {
        torch.uint8: (0, 255),
        torch.int8: (-128, 127),
-        torch.int32: (int(-(2**31)), int(2**31 - 1)),
+        torch.int32: ((-(2**31)), (2**31 - 1)),
    }
    # TODO: add an util function for converting qdtype to dtype
    _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE = {
--- a/torch/ao/ns/fx/graph_passes.py
+++ b/torch/ao/ns/fx/graph_passes.py
@ -164,8 +164,6 @@ def add_loggers_to_model(
                                index_of_arg=node_arg_idx,
                                fqn=fqn,
                            )
-                    else:
-                        pass

            # ensure env is populated with base node
            # Note: runs for both inputs and outputs
--- a/torch/ao/pruning/sparsifier/weight_norm_sparsifier.py
+++ b/torch/ao/pruning/sparsifier/weight_norm_sparsifier.py
@ -142,7 +142,7 @@ class WeightNormSparsifier(BaseSparsifier):

        data = data.repeat(1, values_per_block, 1)

-        threshold_idx = int(round(sparsity_level * num_blocks))
+        threshold_idx = round(sparsity_level * num_blocks)
        threshold_idx = max(0, min(num_blocks - 1, threshold_idx))  # Sanity check
        _, sorted_idx = torch.topk(data, k=threshold_idx, dim=2, largest=False)

--- a/torch/distributed/elastic/utils/data/elastic_distributed_sampler.py
+++ b/torch/distributed/elastic/utils/data/elastic_distributed_sampler.py
@ -62,8 +62,8 @@ class ElasticDistributedSampler(DistributedSampler[T]):

        self.start_index = start_index
        sized_dataset = cast(Sized, self.dataset)
-        self.num_samples = int(
-            math.ceil(float(len(sized_dataset) - self.start_index) / self.num_replicas)
+        self.num_samples = math.ceil(
+            float(len(sized_dataset) - self.start_index) / self.num_replicas
        )
        self.total_size = self.num_samples * self.num_replicas

--- a/torch/distributed/fsdp/_init_utils.py
+++ b/torch/distributed/fsdp/_init_utils.py
@ -56,7 +56,7 @@ try:
 except ImportError:
    _TORCHDISTX_AVAIL = False

-PARAM_BROADCAST_BUCKET_SIZE = int(250 * 1024 * 1024)
+PARAM_BROADCAST_BUCKET_SIZE = 250 * 1024 * 1024
 FSDP_SYNCED = "_fsdp_synced"
 # Specification of process groups for hybrid sharding strategies.
 HybridShardProcessGroupType = tuple[dist.ProcessGroup, dist.ProcessGroup]
--- a/torch/fx/experimental/graph_gradual_typechecker.py
+++ b/torch/fx/experimental/graph_gradual_typechecker.py
@ -942,15 +942,11 @@ class Refine:
        if n.op == "call_function":
            if n.target in _REFINEMENT_RULES:
                self.constraints += _REFINEMENT_RULES[n.target](n)
-            else:
-                pass

        if n.op == "call_module":
            module_instance = self.traced.get_submodule(n.target)
            if type(module_instance) in _REFINEMENT_RULES:
                self.constraints += _REFINEMENT_RULES[type(module_instance)](n)
-            else:
-                pass

        if n.op == "output":

@ -960,23 +956,16 @@ class Refine:
            n.type = torch.fx.node.map_arg(n.args[0], get_node_type)
            return n.type

-        else:
-            pass
-
    def infer_symbolic_relations(self, n: Node):
        n.type = self.convert_to_sympy_symbols(n.type)
        if n.op == "call_function":
            if n.target in _RULES:
                return _RULES[n.target](n)
-            else:
-                pass

        if n.op == "call_module":
            module_instance = self.traced.get_submodule(n.target)
            if type(module_instance) in _RULES:
                return _RULES[type(module_instance)](n, module_instance)
-            else:
-                pass

        if n.op == "output":

@ -986,9 +975,6 @@ class Refine:
            n.type = torch.fx.node.map_arg(n.args[0], get_node_type)
            return n.type

-        else:
-            pass
-

 def get_parameter(traced, target: str):
    """
--- a/torch/nn/functional.py
+++ b/torch/nn/functional.py
@ -4718,7 +4718,7 @@ def interpolate(  # noqa: F811
            ]
        elif torch.jit.is_scripting():
            output_size = [
-                int(math.floor(float(input.size(i + 2)) * scale_factors[i]))
+                math.floor(float(input.size(i + 2)) * scale_factors[i])
                for i in range(dim)
            ]
        else:
--- a/torch/nn/init.py
+++ b/torch/nn/init.py
@ -705,7 +705,7 @@ def sparse_(
        raise ValueError("Only tensors with 2 dimensions are supported")

    rows, cols = tensor.shape
-    num_zeros = int(math.ceil(sparsity * rows))
+    num_zeros = math.ceil(sparsity * rows)

    with torch.no_grad():
        tensor.normal_(0, std, generator=generator)
--- a/torch/nn/parallel/distributed.py
+++ b/torch/nn/parallel/distributed.py
@ -819,7 +819,7 @@ class DistributedDataParallel(Module, Joinable):
                    "Run a dummy forward pass to correctly initialize the modules",
                )
        # used for intra-node param sync and inter-node sync as well
-        self.broadcast_bucket_size = int(250 * 1024 * 1024)
+        self.broadcast_bucket_size = 250 * 1024 * 1024

        # reduction bucket size
        if bucket_cap_mb is None:
--- a/torch/onnx/_internal/exporter/_core.py
+++ b/torch/onnx/_internal/exporter/_core.py
@ -270,8 +270,6 @@ def _set_shape_type(
    elif isinstance(meta_val, (float, torch.SymFloat)):
        value.dtype = ir.DataType.FLOAT
        value.shape = ir.Shape([])
-    else:
-        pass


 def _get_qualified_module_name(cls: Any) -> str:
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
@ -1365,8 +1365,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
            "get_pool_ceil_padding", "input size not accessible", input
        )
    ceiled_output_dim = [
-        int(math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])))
-        + 1
+        math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])) + 1
        for i in range(0, len(padding))
    ]
    # ensure last pooling starts inside
@ -4536,7 +4535,7 @@ def lstm_cell(g: jit_utils.GraphContext, self, hidden, w_ih, w_hh, b_ih, b_hh):
    weight = (
        (w_ih, w_hh, b_ih, b_hh) if symbolic_helper._is_tensor(b_ih) else (w_ih, w_hh)
    )
-    has_biases = True if symbolic_helper._is_tensor(b_ih) else False
+    has_biases = bool(symbolic_helper._is_tensor(b_ih))
    _, h_outs, c_outs = _generic_rnn(
        g,
        "LSTM",
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@ -9710,8 +9710,7 @@ class foreach_pointwise_sample_func(foreach_inputs_sample_func):
            sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, zero_size=True, **_foreach_inputs_kwargs)
            for _ in range(2)
        ]
-        if "scalars" in kwargs:
-            del kwargs["scalars"]
+        kwargs.pop("scalars", None)
        kwargs.update(self._sample_kwargs(opinfo, args[-1], ForeachRightmostArgType.TensorList, dtype))
        yield ForeachSampleInput(input, *args, **kwargs)

--- a/torch/testing/_internal/common_optimizers.py
+++ b/torch/testing/_internal/common_optimizers.py
@ -1955,7 +1955,7 @@ optim_db: list[OptimizerInfo] = [
        supports_complex=False,
        skips=(
            # Note on numerical differences: `compile` applies different matmul tuning,
-            # which leads to deviations compared to eager mode. In the Newton–Schulz
+            # which leads to deviations compared to eager mode. In the Newton-Schulz
            # iteration for orthogonalization, computations are done in bfloat16, further
            # amplifying these numerical differences.
            DecorateInfo(
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@ -858,8 +858,6 @@ class DistributedTest:
                with exception_ctx:
                    dist.barrier(group_id)
                self.assertGreaterAlmostEqual(time.time(), expected_time, delta=0.1)
-            else:
-                pass

        @skip_but_pass_in_sandcastle_if(
            BACKEND != "gloo", "Only gloo backend supports timeouts"
--- a/torch/utils/data/dataset.py
+++ b/torch/utils/data/dataset.py
@ -454,9 +454,7 @@ def random_split(
        for i, frac in enumerate(lengths):
            if frac < 0 or frac > 1:
                raise ValueError(f"Fraction at index {i} is not between 0 and 1")
-            n_items_in_split = int(
-                math.floor(len(dataset) * frac)  # type: ignore[arg-type]
-            )
+            n_items_in_split = math.floor(len(dataset) * frac)  # type: ignore[arg-type]
            subset_lengths.append(n_items_in_split)
        remainder = len(dataset) - sum(subset_lengths)  # type: ignore[arg-type]
        # add 1 to all the lengths in round-robin fashion until the remainder is 0
--- a/torch/utils/tensorboard/_embedding.py
+++ b/torch/utils/tensorboard/_embedding.py
@ -42,7 +42,7 @@ def make_sprite(label_img, save_path):

    # this ensures the sprite image has correct dimension as described in
    # https://www.tensorflow.org/get_started/embedding_viz
-    nrow = int(math.ceil((label_img.size(0)) ** 0.5))
+    nrow = math.ceil((label_img.size(0)) ** 0.5)
    arranged_img_CHW = make_grid(make_np(label_img), ncols=nrow)

    # augment images so that #images equals nrow*nrow