diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi index ad3d8e3abf24..281dceed8f1a 100644 --- a/torch/_C/_distributed_c10d.pyi +++ b/torch/_C/_distributed_c10d.pyi @@ -734,7 +734,7 @@ def _allow_inflight_collective_as_graph_input() -> bool: ... def _unregister_all_process_groups() -> None: ... def _unregister_process_group(group_name: str) -> None: ... -# Initializes the device state in CUmodule so that it’s able to perform NVSHMEM +# Initializes the device state in CUmodule so that it's able to perform NVSHMEM # operations. CUmodule is a pointer to a CUDA module, carried by a int64 in # Python. At C++ interface, it is converted to a uintptr_t. def _nvshmemx_cumodule_init(module: int) -> None: ... diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 41e47a327ff8..0517e8a32e34 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -392,7 +392,7 @@ class GuardManagerWrapper: ----------------------------------------------------------------------- A ``tag safe root`` is a tag safe node whose parent is not tag safe. These boundary nodes mark the points where guard evaluation can safely - prune traversal: if a tag-safe root’s dictionary tag matches, the entire + prune traversal: if a tag-safe root's dictionary tag matches, the entire subtree beneath it is skipped. One strong requirement for tag safe root is for the guarded object to @@ -544,12 +544,12 @@ class GuardManagerWrapper: and node.get_source().endswith(dunder_attrs_assumed_constants) and config.assume_dunder_attributes_remain_unchanged ): - # We trust tuples obtained from a function’s __closure__ or + # We trust tuples obtained from a function's __closure__ or # __defaults__. Any *other* tuple-valued attribute can be # silently replaced—for example: # # foo.bar = (1, 2) # original - # foo.bar = (3, 4) # rebinding that our dict-tag optimisation won’t see + # foo.bar = (3, 4) # rebinding that our dict-tag optimisation won't see # # Therefore only tuples from __closure__ / __defaults__ participate in the # recursive-dict-tag optimization; all others are ignored. @@ -3870,13 +3870,13 @@ class CheckFunctionManager: ) # Note - On Lambda guarding of object aliasing - # We previously installed object‑aliasing guards as relational guards, - # but that undermined the recursive‑dict guard optimization: placing the + # We previously installed object-aliasing guards as relational guards, + # but that undermined the recursive-dict guard optimization: placing the # aliasing guard at a leaf prevented the parent dict node from - # qualifying as a recursive‑dict guard root. Because aliasing guards are + # qualifying as a recursive-dict guard root. Because aliasing guards are # rare, we now emit them as epilogue guards via a small Python lambda. # This repeats the access in Python—adding a bit of work—but the - # overhead is outweighed by the gains from enabling recursive‑dict guard + # overhead is outweighed by the gains from enabling recursive-dict guard # optimization. if ( config.use_lamba_guard_for_object_aliasing diff --git a/torch/_dynamo/variables/functions.py b/torch/_dynamo/variables/functions.py index d1755c85abf6..d0866229efbe 100644 --- a/torch/_dynamo/variables/functions.py +++ b/torch/_dynamo/variables/functions.py @@ -104,7 +104,7 @@ CO_VARARGS = 0x04 CO_VARKEYWORDS = 0x08 -# Module‐level cache keyed by the function object +# Module-level cache keyed by the function object _spec_cache = WeakKeyDictionary() @@ -133,7 +133,7 @@ class FunctionSpec: self.defaults = func.__defaults__ or () self.kwdefaults = func.__kwdefaults__ or {} - # Map positional‐default names → their index in self.defaults + # Map positional-default names → their index in self.defaults self.pos_default_map = dict( zip(self.all_pos_names[-len(self.defaults) :], range(len(self.defaults))) ) @@ -879,7 +879,7 @@ class LocalGeneratorObjectVariable(VariableTracker): retval = self.next_variable(tx) # The exception raised before is still active. We need to check the exception - # table one more time to find the next target. But why? Let’s walk + # table one more time to find the next target. But why? Let's walk # through an example and its generated bytecode: https://godbolt.org/z/ebdTbMv8M # # z = 0 @@ -1075,7 +1075,7 @@ class UserMethodVariable(UserFunctionVariable): # One way is to simplly use `__func__` to unwrap it. # # For recursive dict-tag optimizations, it can be faster to fetch the - # function directly from `cls.__dict__`; that’s why we pass on + # function directly from `cls.__dict__`; that's why we pass on # `source_fn`. Whenever it is possible to access the function from # cls.__dict__, we pass that on to `source_fn`. Because bind_args # operates on the unbound function, most guards should target diff --git a/torch/_functorch/_activation_checkpointing/knapsack.py b/torch/_functorch/_activation_checkpointing/knapsack.py index 67187c92eb7d..0a3eaa5a9344 100644 --- a/torch/_functorch/_activation_checkpointing/knapsack.py +++ b/torch/_functorch/_activation_checkpointing/knapsack.py @@ -69,12 +69,12 @@ def dp_knapsack( # Quantize the memory weights quantized_memory = torch.tensor( - [int(round(m * S)) for m in memory], dtype=torch.long, device="cpu" + [round(m * S) for m in memory], dtype=torch.long, device="cpu" ) runtimes = torch.tensor(runtime, dtype=torch.float32, device="cpu") # Quantized pseudopolynomial DP for 0-1 Knapsack - quantized_max_memory = int(round(max_memory * S)) + quantized_max_memory = round(max_memory * S) n = len(memory) diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py index 102515e74b5e..3cd461171187 100644 --- a/torch/_inductor/codegen/cpp.py +++ b/torch/_inductor/codegen/cpp.py @@ -4197,8 +4197,6 @@ class CppKernelProxy(CppKernel): to_type_node, lambda n: n is not to_type_node ) metrics.cpp_to_dtype_count += 1 - else: - pass def eliminate_to_dtype(sub_graph: torch.fx.Graph): def _eliminate_duplicate_to_node(sub_graph: torch.fx.Graph): diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py index 987b54d721a5..94450a60de21 100644 --- a/torch/_inductor/cpp_builder.py +++ b/torch/_inductor/cpp_builder.py @@ -813,8 +813,6 @@ def _get_os_related_cpp_definitions(cpp_compiler: str) -> list[str]: # On Windows, we need disable min/max macro to avoid C2589 error, as PyTorch CMake: # https://github.com/pytorch/pytorch/blob/9a41570199155eee92ebd28452a556075e34e1b4/CMakeLists.txt#L1118-L1119 os_definitions.append("NOMINMAX") - else: - pass return os_definitions diff --git a/torch/_inductor/mkldnn_lowerings.py b/torch/_inductor/mkldnn_lowerings.py index 3b3a7b072534..b39092772903 100644 --- a/torch/_inductor/mkldnn_lowerings.py +++ b/torch/_inductor/mkldnn_lowerings.py @@ -1348,5 +1348,3 @@ def register_onednn_fusion_ops(): return result add_needs_realized_inputs(cpu_needs_realized_inputs) - else: - pass diff --git a/torch/_subclasses/fake_impls.py b/torch/_subclasses/fake_impls.py index 36ec6b13b7e4..0cf89a9e1b07 100644 --- a/torch/_subclasses/fake_impls.py +++ b/torch/_subclasses/fake_impls.py @@ -157,8 +157,7 @@ def _is_op_registered_to_fake_rule(op): def _deregister_op_impl(op): - if op in op_implementations_dict: - del op_implementations_dict[op] + op_implementations_dict.pop(op, None) for check, impl in op_implementations_checks: if check is op: op_implementations_checks.remove((check, impl)) diff --git a/torch/_tensor_str.py b/torch/_tensor_str.py index c9262e1b2ee0..a867ec79093d 100644 --- a/torch/_tensor_str.py +++ b/torch/_tensor_str.py @@ -247,7 +247,7 @@ def _vector_str(self, indent, summarize, formatter1, formatter2=None): element_length += formatter2.width() + 1 elements_per_line = max( - 1, int(math.floor((PRINT_OPTS.linewidth - indent) / (element_length))) + 1, math.floor((PRINT_OPTS.linewidth - indent) / (element_length)) ) def _val_formatter(val, formatter1=formatter1, formatter2=formatter2): diff --git a/torch/_utils_internal.py b/torch/_utils_internal.py index 839c50d12d56..8602ac955f10 100644 --- a/torch/_utils_internal.py +++ b/torch/_utils_internal.py @@ -305,7 +305,7 @@ def deprecated(): """ def decorator(func: Callable[_P, _T]) -> Callable[_P, _T]: - # Validate naming convention – single leading underscore, not dunder + # Validate naming convention - single leading underscore, not dunder if not (func.__name__.startswith("_")): raise ValueError( "@deprecate must decorate a function whose name " diff --git a/torch/ao/nn/quantized/reference/modules/utils.py b/torch/ao/nn/quantized/reference/modules/utils.py index 0701b73da38b..653e688c4d17 100644 --- a/torch/ao/nn/quantized/reference/modules/utils.py +++ b/torch/ao/nn/quantized/reference/modules/utils.py @@ -198,7 +198,7 @@ def _quantize_weight_decomposed( _DTYPE_TO_QVALUE_BOUNDS: dict[torch.dtype, tuple[int, int]] = { torch.uint8: (0, 255), torch.int8: (-128, 127), - torch.int32: (int(-(2**31)), int(2**31 - 1)), + torch.int32: ((-(2**31)), (2**31 - 1)), } # TODO: add an util function for converting qdtype to dtype @@ -261,7 +261,7 @@ def _dequantize_weight_decomposed( _DTYPE_TO_QVALUE_BOUNDS: dict[torch.dtype, tuple[int, int]] = { torch.uint8: (0, 255), torch.int8: (-128, 127), - torch.int32: (int(-(2**31)), int(2**31 - 1)), + torch.int32: ((-(2**31)), (2**31 - 1)), } # TODO: add an util function for converting qdtype to dtype _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE = { diff --git a/torch/ao/ns/fx/graph_passes.py b/torch/ao/ns/fx/graph_passes.py index bc30a014c195..59e283528d60 100644 --- a/torch/ao/ns/fx/graph_passes.py +++ b/torch/ao/ns/fx/graph_passes.py @@ -164,8 +164,6 @@ def add_loggers_to_model( index_of_arg=node_arg_idx, fqn=fqn, ) - else: - pass # ensure env is populated with base node # Note: runs for both inputs and outputs diff --git a/torch/ao/pruning/sparsifier/weight_norm_sparsifier.py b/torch/ao/pruning/sparsifier/weight_norm_sparsifier.py index 89c707ad33e6..c9577cbb79a3 100644 --- a/torch/ao/pruning/sparsifier/weight_norm_sparsifier.py +++ b/torch/ao/pruning/sparsifier/weight_norm_sparsifier.py @@ -142,7 +142,7 @@ class WeightNormSparsifier(BaseSparsifier): data = data.repeat(1, values_per_block, 1) - threshold_idx = int(round(sparsity_level * num_blocks)) + threshold_idx = round(sparsity_level * num_blocks) threshold_idx = max(0, min(num_blocks - 1, threshold_idx)) # Sanity check _, sorted_idx = torch.topk(data, k=threshold_idx, dim=2, largest=False) diff --git a/torch/distributed/elastic/utils/data/elastic_distributed_sampler.py b/torch/distributed/elastic/utils/data/elastic_distributed_sampler.py index d95c2b0256fe..b905049e9286 100644 --- a/torch/distributed/elastic/utils/data/elastic_distributed_sampler.py +++ b/torch/distributed/elastic/utils/data/elastic_distributed_sampler.py @@ -62,8 +62,8 @@ class ElasticDistributedSampler(DistributedSampler[T]): self.start_index = start_index sized_dataset = cast(Sized, self.dataset) - self.num_samples = int( - math.ceil(float(len(sized_dataset) - self.start_index) / self.num_replicas) + self.num_samples = math.ceil( + float(len(sized_dataset) - self.start_index) / self.num_replicas ) self.total_size = self.num_samples * self.num_replicas diff --git a/torch/distributed/fsdp/_init_utils.py b/torch/distributed/fsdp/_init_utils.py index 90c6e67f1bd4..2c604baee87a 100644 --- a/torch/distributed/fsdp/_init_utils.py +++ b/torch/distributed/fsdp/_init_utils.py @@ -56,7 +56,7 @@ try: except ImportError: _TORCHDISTX_AVAIL = False -PARAM_BROADCAST_BUCKET_SIZE = int(250 * 1024 * 1024) +PARAM_BROADCAST_BUCKET_SIZE = 250 * 1024 * 1024 FSDP_SYNCED = "_fsdp_synced" # Specification of process groups for hybrid sharding strategies. HybridShardProcessGroupType = tuple[dist.ProcessGroup, dist.ProcessGroup] diff --git a/torch/fx/experimental/graph_gradual_typechecker.py b/torch/fx/experimental/graph_gradual_typechecker.py index a8798a6a0726..759d54cb8d37 100644 --- a/torch/fx/experimental/graph_gradual_typechecker.py +++ b/torch/fx/experimental/graph_gradual_typechecker.py @@ -942,15 +942,11 @@ class Refine: if n.op == "call_function": if n.target in _REFINEMENT_RULES: self.constraints += _REFINEMENT_RULES[n.target](n) - else: - pass if n.op == "call_module": module_instance = self.traced.get_submodule(n.target) if type(module_instance) in _REFINEMENT_RULES: self.constraints += _REFINEMENT_RULES[type(module_instance)](n) - else: - pass if n.op == "output": @@ -960,23 +956,16 @@ class Refine: n.type = torch.fx.node.map_arg(n.args[0], get_node_type) return n.type - else: - pass - def infer_symbolic_relations(self, n: Node): n.type = self.convert_to_sympy_symbols(n.type) if n.op == "call_function": if n.target in _RULES: return _RULES[n.target](n) - else: - pass if n.op == "call_module": module_instance = self.traced.get_submodule(n.target) if type(module_instance) in _RULES: return _RULES[type(module_instance)](n, module_instance) - else: - pass if n.op == "output": @@ -986,9 +975,6 @@ class Refine: n.type = torch.fx.node.map_arg(n.args[0], get_node_type) return n.type - else: - pass - def get_parameter(traced, target: str): """ diff --git a/torch/nn/functional.py b/torch/nn/functional.py index a137ebca2c72..eb6c801e97cc 100644 --- a/torch/nn/functional.py +++ b/torch/nn/functional.py @@ -4718,7 +4718,7 @@ def interpolate( # noqa: F811 ] elif torch.jit.is_scripting(): output_size = [ - int(math.floor(float(input.size(i + 2)) * scale_factors[i])) + math.floor(float(input.size(i + 2)) * scale_factors[i]) for i in range(dim) ] else: diff --git a/torch/nn/init.py b/torch/nn/init.py index 8dc932fa8035..83183d8db5f4 100644 --- a/torch/nn/init.py +++ b/torch/nn/init.py @@ -705,7 +705,7 @@ def sparse_( raise ValueError("Only tensors with 2 dimensions are supported") rows, cols = tensor.shape - num_zeros = int(math.ceil(sparsity * rows)) + num_zeros = math.ceil(sparsity * rows) with torch.no_grad(): tensor.normal_(0, std, generator=generator) diff --git a/torch/nn/parallel/distributed.py b/torch/nn/parallel/distributed.py index 11389251ea2f..040d49e17dcc 100644 --- a/torch/nn/parallel/distributed.py +++ b/torch/nn/parallel/distributed.py @@ -819,7 +819,7 @@ class DistributedDataParallel(Module, Joinable): "Run a dummy forward pass to correctly initialize the modules", ) # used for intra-node param sync and inter-node sync as well - self.broadcast_bucket_size = int(250 * 1024 * 1024) + self.broadcast_bucket_size = 250 * 1024 * 1024 # reduction bucket size if bucket_cap_mb is None: diff --git a/torch/onnx/_internal/exporter/_core.py b/torch/onnx/_internal/exporter/_core.py index 0cf27560784f..cdd72447ddb5 100644 --- a/torch/onnx/_internal/exporter/_core.py +++ b/torch/onnx/_internal/exporter/_core.py @@ -270,8 +270,6 @@ def _set_shape_type( elif isinstance(meta_val, (float, torch.SymFloat)): value.dtype = ir.DataType.FLOAT value.shape = ir.Shape([]) - else: - pass def _get_qualified_module_name(cls: Any) -> str: diff --git a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py index 596c656777f8..a0b79bd619b1 100644 --- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py +++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py @@ -1365,8 +1365,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding): "get_pool_ceil_padding", "input size not accessible", input ) ceiled_output_dim = [ - int(math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i]))) - + 1 + math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])) + 1 for i in range(0, len(padding)) ] # ensure last pooling starts inside @@ -4536,7 +4535,7 @@ def lstm_cell(g: jit_utils.GraphContext, self, hidden, w_ih, w_hh, b_ih, b_hh): weight = ( (w_ih, w_hh, b_ih, b_hh) if symbolic_helper._is_tensor(b_ih) else (w_ih, w_hh) ) - has_biases = True if symbolic_helper._is_tensor(b_ih) else False + has_biases = bool(symbolic_helper._is_tensor(b_ih)) _, h_outs, c_outs = _generic_rnn( g, "LSTM", diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 8fdeaaec3f52..d36af7928215 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -9710,8 +9710,7 @@ class foreach_pointwise_sample_func(foreach_inputs_sample_func): sample_inputs_foreach(None, device, dtype, NUM_SIZE0_TENSORS, zero_size=True, **_foreach_inputs_kwargs) for _ in range(2) ] - if "scalars" in kwargs: - del kwargs["scalars"] + kwargs.pop("scalars", None) kwargs.update(self._sample_kwargs(opinfo, args[-1], ForeachRightmostArgType.TensorList, dtype)) yield ForeachSampleInput(input, *args, **kwargs) diff --git a/torch/testing/_internal/common_optimizers.py b/torch/testing/_internal/common_optimizers.py index 7ed5d8a247f0..817b11b3a7a3 100644 --- a/torch/testing/_internal/common_optimizers.py +++ b/torch/testing/_internal/common_optimizers.py @@ -1955,7 +1955,7 @@ optim_db: list[OptimizerInfo] = [ supports_complex=False, skips=( # Note on numerical differences: `compile` applies different matmul tuning, - # which leads to deviations compared to eager mode. In the Newton–Schulz + # which leads to deviations compared to eager mode. In the Newton-Schulz # iteration for orthogonalization, computations are done in bfloat16, further # amplifying these numerical differences. DecorateInfo( diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index a8c7952601ff..c4701432d81d 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -858,8 +858,6 @@ class DistributedTest: with exception_ctx: dist.barrier(group_id) self.assertGreaterAlmostEqual(time.time(), expected_time, delta=0.1) - else: - pass @skip_but_pass_in_sandcastle_if( BACKEND != "gloo", "Only gloo backend supports timeouts" diff --git a/torch/utils/data/dataset.py b/torch/utils/data/dataset.py index a5c6ccaae900..c3db9b892cdb 100644 --- a/torch/utils/data/dataset.py +++ b/torch/utils/data/dataset.py @@ -454,9 +454,7 @@ def random_split( for i, frac in enumerate(lengths): if frac < 0 or frac > 1: raise ValueError(f"Fraction at index {i} is not between 0 and 1") - n_items_in_split = int( - math.floor(len(dataset) * frac) # type: ignore[arg-type] - ) + n_items_in_split = math.floor(len(dataset) * frac) # type: ignore[arg-type] subset_lengths.append(n_items_in_split) remainder = len(dataset) - sum(subset_lengths) # type: ignore[arg-type] # add 1 to all the lengths in round-robin fashion until the remainder is 0 diff --git a/torch/utils/tensorboard/_embedding.py b/torch/utils/tensorboard/_embedding.py index 44cb6c41b017..f3ee9ef36095 100644 --- a/torch/utils/tensorboard/_embedding.py +++ b/torch/utils/tensorboard/_embedding.py @@ -42,7 +42,7 @@ def make_sprite(label_img, save_path): # this ensures the sprite image has correct dimension as described in # https://www.tensorflow.org/get_started/embedding_viz - nrow = int(math.ceil((label_img.size(0)) ** 0.5)) + nrow = math.ceil((label_img.size(0)) ** 0.5) arranged_img_CHW = make_grid(make_np(label_img), ncols=nrow) # augment images so that #images equals nrow*nrow