From fdab48a7c1c4f0f7416c3517cab7f353619a5091 Mon Sep 17 00:00:00 2001
From: Yuanyuan Chen <cyyever@outlook.com>
Date: Sat, 18 Oct 2025 07:36:18 +0000
Subject: [PATCH] Enable all PIE rules on ruff (#165814)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR enables all PIE rules on ruff, there are already some enabled rules from this family, the new added rules are
```
PIE796  Enum contains duplicate value: {value}
PIE808  Unnecessary start argument in range
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165814
Approved by: https://github.com/ezyang
---
 benchmarks/gpt_fast/mixtral_moe_quantize.py   |  2 +-
 caffe2/perfkernels/hp_emblookup_codegen.py    |  8 ++--
 pyproject.toml                                |  7 +---
 .../ao/sparsity/test_activation_sparsifier.py |  4 +-
 test/ao/sparsity/test_data_scheduler.py       |  2 +-
 test/ao/sparsity/test_data_sparsifier.py      |  2 +-
 test/ao/sparsity/test_sparsifier.py           |  4 +-
 .../quantization/test_quantization.py         | 12 +++---
 test/distributed/checkpoint/test_planner.py   |  2 +-
 test/distributed/checkpoint/test_utils.py     |  2 +-
 .../elastic/agent/server/test/api_test.py     |  2 +-
 .../elastic/multiprocessing/api_test.py       |  2 +-
 .../timer/file_based_local_timer_test.py      |  2 +-
 .../elastic/timer/local_timer_example.py      |  4 +-
 .../elastic/timer/local_timer_test.py         |  2 +-
 .../utils/data/cycling_iterator_test.py       |  4 +-
 .../fsdp/test_fsdp_hybrid_shard.py            |  4 +-
 test/distributed/tensor/test_dtensor_ops.py   |  4 +-
 test/distributed/test_device_mesh.py          |  2 +-
 test/distributions/test_distributions.py      | 34 ++++++++---------
 test/dynamo/test_export.py                    |  8 ++--
 test/dynamo/test_functions.py                 |  2 +-
 test/dynamo/test_modules.py                   |  2 +-
 test/dynamo/test_repros.py                    |  6 +--
 test/functorch/test_ac.py                     |  4 +-
 test/inductor/test_codecache.py               |  2 +-
 test/inductor/test_compiled_autograd.py       |  2 +-
 test/inductor/test_max_autotune.py            |  2 +-
 test/inductor/test_triton_kernels.py          |  4 +-
 test/jit/xnnpack/test_xnnpack_delegate.py     |  2 +-
 test/nn/test_convolution.py                   |  2 +-
 test/nn/test_embedding.py                     |  2 +-
 test/nn/test_multihead_attention.py           |  2 +-
 test/nn/test_pooling.py                       |  2 +-
 test/onnx/test_onnx_opset.py                  |  4 +-
 test/optim/test_lrscheduler.py                |  2 +-
 test/profiler/test_profiler.py                |  6 +--
 .../core/experimental/test_floatx.py          |  2 +-
 test/test_dataloader.py                       |  2 +-
 test/test_datapipe.py                         |  6 +--
 test/test_dynamic_shapes.py                   |  4 +-
 test/test_indexing.py                         |  2 +-
 test/test_jit.py                              |  8 ++--
 test/test_jit_fuser_te.py                     |  8 ++--
 test/test_matmul_cuda.py                      |  2 +-
 test/test_mps.py                              | 14 +++----
 test/test_numa_binding.py                     |  6 +--
 test/test_reductions.py                       |  4 +-
 test/test_serialization.py                    |  2 +-
 test/test_sparse.py                           |  2 +-
 test/test_sparse_csr.py                       |  2 +-
 test/test_static_runtime.py                   |  2 +-
 test/test_tensorboard.py                      |  2 +-
 test/test_tensorexpr.py                       |  2 +-
 test/test_torch.py                            |  2 +-
 test/test_view_ops.py                         |  2 +-
 test/test_xnnpack_integration.py              |  4 +-
 torch/_decomp/decompositions_for_jvp.py       |  2 +-
 torch/_dynamo/eval_frame.py                   |  4 +-
 torch/_inductor/dependencies.py               |  2 +-
 torch/_meta_registrations.py                  |  2 +-
 torch/_numpy/_funcs_impl.py                   |  2 +-
 torch/_refs/__init__.py                       |  2 +-
 torch/_tensor_str.py                          |  6 +--
 torch/ao/ns/fx/pattern_utils.py               |  2 +-
 .../activation_sparsifier.py                  |  6 +--
 .../benchmarks/evaluate_disk_savings.py       |  2 +-
 .../lightning/tests/test_callbacks.py         |  2 +-
 .../sparsifier/nearly_diagonal_sparsifier.py  |  2 +-
 .../ao/quantization/experimental/observer.py  |  4 +-
 torch/ao/quantization/fx/_decomposed.py       |  2 +-
 torch/autograd/profiler.py                    |  2 +-
 torch/distributed/_pycute/layout.py           | 16 ++++----
 .../distributed/_symmetric_memory/__init__.py |  6 +--
 .../elastic/multiprocessing/api.py            |  2 +-
 .../distributed/elastic/timer/local_timer.py  |  2 +-
 torch/distributed/tensor/_dtensor_spec.py     |  2 +-
 torch/distributed/tensor/parallel/fsdp.py     |  2 +-
 torch/nested/_internal/ops.py                 |  2 +-
 .../torchscript_exporter/symbolic_helper.py   |  2 +-
 .../torchscript_exporter/symbolic_opset12.py  |  2 +-
 .../torchscript_exporter/symbolic_opset8.py   |  2 +-
 .../torchscript_exporter/symbolic_opset9.py   | 18 ++++-----
 .../_internal/common_methods_invocations.py   |  4 +-
 torch/testing/_internal/common_nn.py          | 10 ++---
 .../distributed/_tensor/common_dtensor.py     |  2 +-
 .../_internal/distributed/distributed_test.py | 38 +++++++++----------
 .../distributed/multi_threaded_pg.py          |  2 +-
 .../distributed/rpc/dist_autograd_test.py     |  6 +--
 .../_internal/distributed/rpc/rpc_test.py     |  4 +-
 torch/testing/_internal/jit_utils.py          |  2 +-
 torch/testing/_internal/triton_utils.py       |  2 +-
 92 files changed, 200 insertions(+), 205 deletions(-)

diff --git a/benchmarks/gpt_fast/mixtral_moe_quantize.py b/benchmarks/gpt_fast/mixtral_moe_quantize.py
index 50ffd61bdb83..fd0342ce3d59 100644
--- a/benchmarks/gpt_fast/mixtral_moe_quantize.py
+++ b/benchmarks/gpt_fast/mixtral_moe_quantize.py
@@ -85,7 +85,7 @@ class WeightOnlyInt8QuantHandler:
                 cur_state_dict[f"{fqn}.weight"] = int8_weight
                 cur_state_dict[f"{fqn}.scales"] = scales.to(mod.weight.dtype)
             elif isinstance(mod, ConditionalFeedForward):
-                for weight_idx in range(0, 3):
+                for weight_idx in range(3):
                     weight_name = f"w{weight_idx + 1}"
                     scales_name = f"scales{weight_idx + 1}"
                     weight = getattr(mod, weight_name)
diff --git a/caffe2/perfkernels/hp_emblookup_codegen.py b/caffe2/perfkernels/hp_emblookup_codegen.py
index 91f6ac238c0f..43254cddf26e 100644
--- a/caffe2/perfkernels/hp_emblookup_codegen.py
+++ b/caffe2/perfkernels/hp_emblookup_codegen.py
@@ -74,7 +74,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
         )
 
     code.append("      " + OutType + "* op = &out[rangeIndex * block_size];")
-    for i in range(0, uf):
+    for i in range(uf):
         j = 8 * i
         code.append("      __m256 vop" + str(j) + " = _mm256_setzero_ps();")
 
@@ -158,7 +158,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
         "&input[idx_pref_T0 * fused_block_size];"
     )
 
-    for i in range(0, uf):
+    for i in range(uf):
         j = 8 * i
         cachelinesize = 64
         byteoffset = sizeof[InType] * j
@@ -170,7 +170,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
         code.append("      if (!normalize_by_lengths || length == 0) {")
     else:
         code.append("      if (!normalize_by_lengths || lengths[rangeIndex] == 0) {")
-    for i in range(0, uf):
+    for i in range(uf):
         j = 8 * i
         code.append("        _mm256_storeu_ps(&op[" + str(j) + "], vop" + str(j) + ");")
     code.append("      } else {")
@@ -181,7 +181,7 @@ def unroll(uf, IndexType, InType, OutType, use_weights, isa, fused, use_offsets)
         code.append(
             "        __m256 vlen_inv = _mm256_set1_ps(1.0f / lengths[rangeIndex]);"
         )
-    for i in range(0, uf):
+    for i in range(uf):
         j = 8 * i
         code.append(
             "        _mm256_storeu_ps(&op["
diff --git a/pyproject.toml b/pyproject.toml
index e42f08d296f3..f18368b90d8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -204,12 +204,7 @@ select = [
     "NPY",
     "PERF",
     "PGH004",
-    "PIE790",
-    "PIE794",
-    "PIE800",
-    "PIE804",
-    "PIE807",
-    "PIE810",
+    "PIE",
     "PLC0131", # type bivariance
     "PLC0132", # type param mismatch
     "PLC1802", # len({expression}) used as condition without comparison
diff --git a/test/ao/sparsity/test_activation_sparsifier.py b/test/ao/sparsity/test_activation_sparsifier.py
index 0f3f36ecda9f..079f5e1941d2 100644
--- a/test/ao/sparsity/test_activation_sparsifier.py
+++ b/test/ao/sparsity/test_activation_sparsifier.py
@@ -190,7 +190,7 @@ class TestActivationSparsifier(TestCase):
                 if features is None:
                     assert torch.all(mask * input_data == output)
                 else:
-                    for feature_idx in range(0, len(features)):
+                    for feature_idx in range(len(features)):
                         feature = torch.Tensor(
                             [features[feature_idx]], device=input_data.device
                         ).long()
@@ -378,7 +378,7 @@ class TestActivationSparsifier(TestCase):
         # some dummy data
         data_list = []
         num_data_points = 5
-        for _ in range(0, num_data_points):
+        for _ in range(num_data_points):
             rand_data = torch.randn(16, 1, 28, 28)
             activation_sparsifier.model(rand_data)
             data_list.append(rand_data)
diff --git a/test/ao/sparsity/test_data_scheduler.py b/test/ao/sparsity/test_data_scheduler.py
index de0a885f0153..47a85e1edda1 100644
--- a/test/ao/sparsity/test_data_scheduler.py
+++ b/test/ao/sparsity/test_data_scheduler.py
@@ -143,7 +143,7 @@ class TestBaseDataScheduler(TestCase):
 
         # checking step count
         step_cnt = 5
-        for _ in range(0, step_cnt):
+        for _ in range(step_cnt):
             sparsifier.step()
             scheduler.step()
 
diff --git a/test/ao/sparsity/test_data_sparsifier.py b/test/ao/sparsity/test_data_sparsifier.py
index dce04292763f..fa08e8c90ac2 100644
--- a/test/ao/sparsity/test_data_sparsifier.py
+++ b/test/ao/sparsity/test_data_sparsifier.py
@@ -123,7 +123,7 @@ class _BaseDataSparsiferTestCase(TestCase):
 
         step_count = 3
 
-        for _ in range(0, step_count):
+        for _ in range(step_count):
             sparsifier.step()
         for some_data in all_data:
             name, data, _ = self._get_name_data_config(some_data)
diff --git a/test/ao/sparsity/test_sparsifier.py b/test/ao/sparsity/test_sparsifier.py
index d5010b7abccd..a940a3e9feba 100644
--- a/test/ao/sparsity/test_sparsifier.py
+++ b/test/ao/sparsity/test_sparsifier.py
@@ -472,8 +472,8 @@ class TestNearlyDiagonalSparsifier(TestCase):
         else:
             height, width = mask.shape
             dist_to_diagonal = nearliness // 2
-            for row in range(0, height):
-                for col in range(0, width):
+            for row in range(height):
+                for col in range(width):
                     if abs(row - col) <= dist_to_diagonal:
                         assert mask[row, col] == 1
                     else:
diff --git a/test/distributed/algorithms/quantization/test_quantization.py b/test/distributed/algorithms/quantization/test_quantization.py
index b65e0a747405..6044eac70b51 100644
--- a/test/distributed/algorithms/quantization/test_quantization.py
+++ b/test/distributed/algorithms/quantization/test_quantization.py
@@ -79,7 +79,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
             dist.init_process_group(
                 store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
             )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
             group_id = dist.group.WORLD
             self._test_all_gather(
                 group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.FP16
@@ -94,7 +94,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
             dist.init_process_group(
                 store=store, rank=self.rank, world_size=self.world_size, backend="gloo"
             )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
             group_id = dist.group.WORLD
             self._test_all_gather(
                 group, group_id, self.rank, dtype=torch.float32, qtype=DQuantType.BFP16
@@ -111,7 +111,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
             dist.init_process_group(
                 store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
             )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
             group_id = dist.new_group(range(self.world_size))
             rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
             self._test_all_to_all(
@@ -135,7 +135,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
             dist.init_process_group(
                 store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
             )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
             group_id = dist.new_group(range(self.world_size))
             rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
             self._test_all_to_all(
@@ -158,7 +158,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
             dist.init_process_group(
                 store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
             )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
             group_id = dist.new_group(range(self.world_size))
             rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
             self._test_all_to_all_single(
@@ -181,7 +181,7 @@ if BACKEND == "gloo" or BACKEND == "nccl":
             dist.init_process_group(
                 store=store, rank=self.rank, world_size=self.world_size, backend="nccl"
             )
-            group = list(range(0, self.world_size))
+            group = list(range(self.world_size))
             group_id = dist.new_group(range(self.world_size))
             rank_to_GPU = init_multigpu_helper(self.world_size, BACKEND)
             self._test_all_to_all_single(
diff --git a/test/distributed/checkpoint/test_planner.py b/test/distributed/checkpoint/test_planner.py
index edf043301ed2..86bed29de998 100644
--- a/test/distributed/checkpoint/test_planner.py
+++ b/test/distributed/checkpoint/test_planner.py
@@ -66,7 +66,7 @@ if TEST_WITH_DEV_DBG_ASAN:
 def create_sharded_tensor(rank, world_size, shards_per_rank, shard_size=8):
     shards_metadata = []
     local_shards = []
-    for idx in range(0, world_size * shards_per_rank):
+    for idx in range(world_size * shards_per_rank):
         shard_rank = idx // shards_per_rank
         shard_md = ShardMetadata(
             shard_offsets=[idx * shard_size],
diff --git a/test/distributed/checkpoint/test_utils.py b/test/distributed/checkpoint/test_utils.py
index 722670c95f18..79dbe741822c 100644
--- a/test/distributed/checkpoint/test_utils.py
+++ b/test/distributed/checkpoint/test_utils.py
@@ -45,7 +45,7 @@ if TEST_WITH_DEV_DBG_ASAN:
 def create_sharded_tensor(rank, world_size, shards_per_rank):
     shards_metadata = []
     local_shards = []
-    for idx in range(0, world_size * shards_per_rank):
+    for idx in range(world_size * shards_per_rank):
         shard_rank = idx // shards_per_rank
         shard_md = ShardMetadata(
             shard_offsets=[idx * 8], shard_sizes=[8], placement=f"rank:{shard_rank}/cpu"
diff --git a/test/distributed/elastic/agent/server/test/api_test.py b/test/distributed/elastic/agent/server/test/api_test.py
index 11776324ed7f..dd96f9b6dfb0 100644
--- a/test/distributed/elastic/agent/server/test/api_test.py
+++ b/test/distributed/elastic/agent/server/test/api_test.py
@@ -633,7 +633,7 @@ class SimpleElasticAgentTest(unittest.TestCase):
         worker_group = agent.get_worker_group()
 
         num_restarts = 3
-        for _ in range(0, num_restarts):
+        for _ in range(num_restarts):
             agent._restart_workers(worker_group)
             self.assertEqual(WorkerState.HEALTHY, worker_group.state)
 
diff --git a/test/distributed/elastic/multiprocessing/api_test.py b/test/distributed/elastic/multiprocessing/api_test.py
index 4ac0dcacb4b8..19d941e0d9c6 100644
--- a/test/distributed/elastic/multiprocessing/api_test.py
+++ b/test/distributed/elastic/multiprocessing/api_test.py
@@ -146,7 +146,7 @@ def echo_large(size: int) -> dict[int, str]:
     returns a large output ({0: test0", 1: "test1", ..., (size-1):f"test{size-1}"})
     """
     out = {}
-    for idx in range(0, size):
+    for idx in range(size):
         out[idx] = f"test{idx}"
     return out
 
diff --git a/test/distributed/elastic/timer/file_based_local_timer_test.py b/test/distributed/elastic/timer/file_based_local_timer_test.py
index cf597eb6a37a..0125ce5cd25a 100644
--- a/test/distributed/elastic/timer/file_based_local_timer_test.py
+++ b/test/distributed/elastic/timer/file_based_local_timer_test.py
@@ -191,7 +191,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
         """
         client = timer.FileTimerClient(file_path)
         sem.release()
-        for _ in range(0, n):
+        for _ in range(n):
             client.acquire("test_scope", 0)
             time.sleep(interval)
 
diff --git a/test/distributed/elastic/timer/local_timer_example.py b/test/distributed/elastic/timer/local_timer_example.py
index 09421f4b38f5..6d438f2536d6 100644
--- a/test/distributed/elastic/timer/local_timer_example.py
+++ b/test/distributed/elastic/timer/local_timer_example.py
@@ -102,7 +102,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
 
             world_size = 8
             processes = []
-            for i in range(0, world_size):
+            for i in range(world_size):
                 if i % 2 == 0:
                     p = spawn_ctx.Process(target=_stuck_function, args=(i, mp_queue))
                 else:
@@ -110,7 +110,7 @@ if not (IS_WINDOWS or IS_MACOS or IS_ARM64):
                 p.start()
                 processes.append(p)
 
-            for i in range(0, world_size):
+            for i in range(world_size):
                 p = processes[i]
                 p.join()
                 if i % 2 == 0:
diff --git a/test/distributed/elastic/timer/local_timer_test.py b/test/distributed/elastic/timer/local_timer_test.py
index b65b202d5ec6..8818b1788c62 100644
--- a/test/distributed/elastic/timer/local_timer_test.py
+++ b/test/distributed/elastic/timer/local_timer_test.py
@@ -127,7 +127,7 @@ if not INVALID_PLATFORMS:
         interval seconds. Releases the given semaphore once before going to work.
         """
         sem.release()
-        for i in range(0, n):
+        for i in range(n):
             mp_queue.put(TimerRequest(i, "test_scope", 0))
             time.sleep(interval)
 
diff --git a/test/distributed/elastic/utils/data/cycling_iterator_test.py b/test/distributed/elastic/utils/data/cycling_iterator_test.py
index c9cb055a2c22..835ed6ebbd01 100644
--- a/test/distributed/elastic/utils/data/cycling_iterator_test.py
+++ b/test/distributed/elastic/utils/data/cycling_iterator_test.py
@@ -15,7 +15,7 @@ class CyclingIteratorTest(unittest.TestCase):
     def generator(self, epoch, stride, max_epochs):
         # generate an continuously incrementing list each epoch
         # e.g. [0,1,2] [3,4,5] [6,7,8] ...
-        return iter([stride * epoch + i for i in range(0, stride)])
+        return iter([stride * epoch + i for i in range(stride)])
 
     def test_cycling_iterator(self):
         stride = 3
@@ -25,7 +25,7 @@ class CyclingIteratorTest(unittest.TestCase):
             return self.generator(epoch, stride, max_epochs)
 
         it = CyclingIterator(n=max_epochs, generator_fn=generator_fn)
-        for i in range(0, stride * max_epochs):
+        for i in range(stride * max_epochs):
             self.assertEqual(i, next(it))
 
         with self.assertRaises(StopIteration):
diff --git a/test/distributed/fsdp/test_fsdp_hybrid_shard.py b/test/distributed/fsdp/test_fsdp_hybrid_shard.py
index 26a05bbc4171..e2ea4c5fc9af 100644
--- a/test/distributed/fsdp/test_fsdp_hybrid_shard.py
+++ b/test/distributed/fsdp/test_fsdp_hybrid_shard.py
@@ -124,7 +124,7 @@ class TestFSDPHybridShard(FSDPTest):
         model = MyModel().to(device_type)
         num_node_devices = torch.accelerator.device_count()
         shard_rank_lists = (
-            list(range(0, num_node_devices // 2)),
+            list(range(num_node_devices // 2)),
             list(range(num_node_devices // 2, num_node_devices)),
         )
         shard_groups = (
@@ -175,7 +175,7 @@ class TestFSDPHybridShard(FSDPTest):
         model = MyModel().to(device_type)
         num_node_devices = torch.accelerator.device_count()
         shard_rank_lists = (
-            list(range(0, num_node_devices // 2)),
+            list(range(num_node_devices // 2)),
             list(range(num_node_devices // 2, num_node_devices)),
         )
         shard_groups = (
diff --git a/test/distributed/tensor/test_dtensor_ops.py b/test/distributed/tensor/test_dtensor_ops.py
index c4373773d662..df51152a9030 100644
--- a/test/distributed/tensor/test_dtensor_ops.py
+++ b/test/distributed/tensor/test_dtensor_ops.py
@@ -802,7 +802,7 @@ class TestLocalDTensorOps(TestDTensorOps):
         self.run_opinfo_test(dtype, op)
 
     def test_mean(self):
-        with LocalTensorMode(frozenset(range(0, self.world_size))):
+        with LocalTensorMode(frozenset(range(self.world_size))):
             self.run_mean()
 
     def test_one_hot(self):
@@ -811,7 +811,7 @@ class TestLocalDTensorOps(TestDTensorOps):
     def run_opinfo_test(
         self, dtype, op, requires_grad=True, sample_inputs_filter=lambda s: True
     ):
-        with LocalTensorMode(frozenset(range(0, self.world_size))):
+        with LocalTensorMode(frozenset(range(self.world_size))):
             super().run_opinfo_test(dtype, op, requires_grad, sample_inputs_filter)
 
     def assertEqualOnRank(self, x, y, msg=None, *, rank=0):
diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py
index 0ed4651d3ec5..2db674a458ed 100644
--- a/test/distributed/test_device_mesh.py
+++ b/test/distributed/test_device_mesh.py
@@ -536,7 +536,7 @@ class DeviceMeshTestNDim(DTensorTestBase):
         # Create shard groups (e.g. (0, 1, 2, 3), (4, 5, 6, 7))
         # and assign the correct shard group to each rank
         shard_rank_lists = (
-            list(range(0, self.world_size // 2)),
+            list(range(self.world_size // 2)),
             list(range(self.world_size // 2, self.world_size)),
         )
         shard_groups = (
diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py
index b588589d81ba..550589002003 100644
--- a/test/distributions/test_distributions.py
+++ b/test/distributions/test_distributions.py
@@ -5722,11 +5722,11 @@ class TestKL(DistributionsTestCase):
     def test_kl_multivariate_normal(self):
         set_rng_seed(0)  # see Note [Randomized statistical tests]
         n = 5  # Number of tests for multivariate_normal
-        for i in range(0, n):
-            loc = [torch.randn(4) for _ in range(0, 2)]
+        for i in range(n):
+            loc = [torch.randn(4) for _ in range(2)]
             scale_tril = [
                 transform_to(constraints.lower_cholesky)(torch.randn(4, 4))
-                for _ in range(0, 2)
+                for _ in range(2)
             ]
             p = MultivariateNormal(loc=loc[0], scale_tril=scale_tril[0])
             q = MultivariateNormal(loc=loc[1], scale_tril=scale_tril[1])
@@ -5755,10 +5755,10 @@ class TestKL(DistributionsTestCase):
 
     def test_kl_multivariate_normal_batched(self):
         b = 7  # Number of batches
-        loc = [torch.randn(b, 3) for _ in range(0, 2)]
+        loc = [torch.randn(b, 3) for _ in range(2)]
         scale_tril = [
             transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3))
-            for _ in range(0, 2)
+            for _ in range(2)
         ]
         expected_kl = torch.stack(
             [
@@ -5766,7 +5766,7 @@ class TestKL(DistributionsTestCase):
                     MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
                     MultivariateNormal(loc[1][i], scale_tril=scale_tril[1][i]),
                 )
-                for i in range(0, b)
+                for i in range(b)
             ]
         )
         actual_kl = kl_divergence(
@@ -5777,7 +5777,7 @@ class TestKL(DistributionsTestCase):
 
     def test_kl_multivariate_normal_batched_broadcasted(self):
         b = 7  # Number of batches
-        loc = [torch.randn(b, 3) for _ in range(0, 2)]
+        loc = [torch.randn(b, 3) for _ in range(2)]
         scale_tril = [
             transform_to(constraints.lower_cholesky)(torch.randn(b, 3, 3)),
             transform_to(constraints.lower_cholesky)(torch.randn(3, 3)),
@@ -5788,7 +5788,7 @@ class TestKL(DistributionsTestCase):
                     MultivariateNormal(loc[0][i], scale_tril=scale_tril[0][i]),
                     MultivariateNormal(loc[1][i], scale_tril=scale_tril[1]),
                 )
-                for i in range(0, b)
+                for i in range(b)
             ]
         )
         actual_kl = kl_divergence(
@@ -5800,15 +5800,15 @@ class TestKL(DistributionsTestCase):
     def test_kl_lowrank_multivariate_normal(self):
         set_rng_seed(0)  # see Note [Randomized statistical tests]
         n = 5  # Number of tests for lowrank_multivariate_normal
-        for i in range(0, n):
-            loc = [torch.randn(4) for _ in range(0, 2)]
-            cov_factor = [torch.randn(4, 3) for _ in range(0, 2)]
+        for i in range(n):
+            loc = [torch.randn(4) for _ in range(2)]
+            cov_factor = [torch.randn(4, 3) for _ in range(2)]
             cov_diag = [
-                transform_to(constraints.positive)(torch.randn(4)) for _ in range(0, 2)
+                transform_to(constraints.positive)(torch.randn(4)) for _ in range(2)
             ]
             covariance_matrix = [
                 cov_factor[i].matmul(cov_factor[i].t()) + cov_diag[i].diag()
-                for i in range(0, 2)
+                for i in range(2)
             ]
             p = LowRankMultivariateNormal(loc[0], cov_factor[0], cov_diag[0])
             q = LowRankMultivariateNormal(loc[1], cov_factor[1], cov_diag[1])
@@ -5861,10 +5861,10 @@ class TestKL(DistributionsTestCase):
 
     def test_kl_lowrank_multivariate_normal_batched(self):
         b = 7  # Number of batches
-        loc = [torch.randn(b, 3) for _ in range(0, 2)]
-        cov_factor = [torch.randn(b, 3, 2) for _ in range(0, 2)]
+        loc = [torch.randn(b, 3) for _ in range(2)]
+        cov_factor = [torch.randn(b, 3, 2) for _ in range(2)]
         cov_diag = [
-            transform_to(constraints.positive)(torch.randn(b, 3)) for _ in range(0, 2)
+            transform_to(constraints.positive)(torch.randn(b, 3)) for _ in range(2)
         ]
         expected_kl = torch.stack(
             [
@@ -5876,7 +5876,7 @@ class TestKL(DistributionsTestCase):
                         loc[1][i], cov_factor[1][i], cov_diag[1][i]
                     ),
                 )
-                for i in range(0, b)
+                for i in range(b)
             ]
         )
         actual_kl = kl_divergence(
diff --git a/test/dynamo/test_export.py b/test/dynamo/test_export.py
index 112da727ec61..f3f438d241af 100644
--- a/test/dynamo/test_export.py
+++ b/test/dynamo/test_export.py
@@ -49,9 +49,9 @@ class ExportTests(torch._dynamo.test_case.TestCase):
             lc_key = state[0]
             lc_val = state[1]
             bar = []
-            for _ in range(0, 4):
+            for _ in range(4):
                 bar2 = []
-                for _ in range(0, 3):
+                for _ in range(3):
                     bar2.append(
                         lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
                     )
@@ -665,9 +665,9 @@ def forward(self, x, y):
             lc_key = state[0]
             lc_val = state[1]
             bar = []
-            for _ in range(0, 4):
+            for _ in range(4):
                 bar2 = []
-                for _ in range(0, 3):
+                for _ in range(3):
                     bar2.append(
                         lc_key + lc_val + torch.tensor([0.1, 0.25, 0.4, 0.5, 0.1])
                     )
diff --git a/test/dynamo/test_functions.py b/test/dynamo/test_functions.py
index d16676cda8ee..647033e63e4c 100644
--- a/test/dynamo/test_functions.py
+++ b/test/dynamo/test_functions.py
@@ -3627,7 +3627,7 @@ class GraphModule(torch.nn.Module):
                 )
 
         test(range(10), slice(1, 10, 2), expected=range(1, 10, 2))
-        test(range(10), slice(None, 10, None), expected=range(0, 10))
+        test(range(10), slice(None, 10, None), expected=range(10))
         test(range(10), slice(-1, 7, None), expected=range(9, 7))
         test(range(10), slice(-1, 7, 2), expected=range(9, 7, 2))
         test(range(1, 10, 2), slice(3, 7, 2), expected=range(7, 11, 4))
diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py
index 7cac7eca7239..c251ce28bac4 100644
--- a/test/dynamo/test_modules.py
+++ b/test/dynamo/test_modules.py
@@ -3047,7 +3047,7 @@ class OptimizedModuleTest(torch._dynamo.test_case.TestCase):
         def generate(x, c):
             return mod(x) + c
 
-        for _ in range(0, 10):
+        for _ in range(10):
             generate(torch.randn(10, 10), 0)
             generate(torch.randn(10, 10), 1)
         self.assertEqual(cnt.frame_count, 2)
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
index 362a541918c3..ac0515ac6ba8 100644
--- a/test/dynamo/test_repros.py
+++ b/test/dynamo/test_repros.py
@@ -4471,7 +4471,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
 
         compiled_fn = torch.compile(func, backend=cnt, fullgraph=True)
         requires_grad = func is not func1
-        for _ in range(0, 5):
+        for _ in range(5):
             # Inputs
             eager_a = torch.ones([6], requires_grad=requires_grad)
             compiled_a = torch.ones([6], requires_grad=requires_grad)
@@ -4623,7 +4623,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
         x = torch.rand([2, 2])
         self.assertEqual(opt_fn(x, counter), fn(x, counter))
         self.assertEqual(counter[0], 2)
-        for _ in range(0, 10):
+        for _ in range(10):
             opt_fn(x, counter)
         self.assertEqual(counter[0], 12)
         if torch._dynamo.config.assume_static_by_default:
@@ -4784,7 +4784,7 @@ class ReproTests(torch._dynamo.test_case.TestCase):
     def test_contains_range_constprop(self):
         def fn(x):
             # dynamo should const prop to False
-            if 3 in range(0, 10):
+            if 3 in range(10):
                 return x + 1
             else:
                 return x + 2
diff --git a/test/functorch/test_ac.py b/test/functorch/test_ac.py
index fde84b6683ed..d0611f19cf2a 100644
--- a/test/functorch/test_ac.py
+++ b/test/functorch/test_ac.py
@@ -106,7 +106,7 @@ class MemoryBudgetTest(TestCase):
             return f(x, ws)
 
         _, eager_flops = get_mem_and_flops(call)
-        for budget in range(0, 11):
+        for budget in range(11):
             mem, flops = get_mem_and_flops(call, memory_budget=budget / 10)
             if budget <= 5:
                 # We start saving the matmuls
@@ -251,7 +251,7 @@ class MemoryBudgetTest(TestCase):
             return f(x, ws)
 
         expected = call()
-        for budget in range(0, 11):
+        for budget in range(11):
             memory_budget = budget / 10
             torch._dynamo.reset()
             with config.patch(activation_memory_budget=memory_budget):
diff --git a/test/inductor/test_codecache.py b/test/inductor/test_codecache.py
index 78c2dd3de852..ca2e9007109d 100644
--- a/test/inductor/test_codecache.py
+++ b/test/inductor/test_codecache.py
@@ -1146,7 +1146,7 @@ class TestFxGraphCache(TestCase):
             raise unittest.SkipTest(f"requires {GPU_TYPE}")
 
         def fn1(x):
-            return x + torch.tensor(list(range(0, 12)), device=device)
+            return x + torch.tensor(list(range(12)), device=device)
 
         def fn2(x):
             return x + torch.tensor(list(range(1, 13)), device=device)
diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py
index 2612af01f6ff..716d3bfafee2 100644
--- a/test/inductor/test_compiled_autograd.py
+++ b/test/inductor/test_compiled_autograd.py
@@ -1599,7 +1599,7 @@ main()
 
         eager_check()
 
-        for i in range(0, 5):
+        for i in range(5):
             with compiled_autograd._enable(compiler_fn):
                 eager_check()
 
diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py
index 6645f17fb9ee..85405283e4bd 100644
--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@@ -2095,7 +2095,7 @@ class TestMaxAutotune(TestCase):
 
         # Test loop.
         def test_func2(x):
-            for i in range(0, 10):
+            for i in range(10):
                 x = torch.matmul(x, x)
             return x
 
diff --git a/test/inductor/test_triton_kernels.py b/test/inductor/test_triton_kernels.py
index 9a21220ce4d9..4739d00f1f4a 100644
--- a/test/inductor/test_triton_kernels.py
+++ b/test/inductor/test_triton_kernels.py
@@ -3005,7 +3005,7 @@ class MutationTests(torch._inductor.test_case.TestCase):
             mask = offsets < n_elements
             x = tl.load(in_ptr0 + offsets, mask=mask)
             y = tl.load(in_ptr1 + offsets, mask=mask)
-            for i in range(0, BLOCK_SIZE):
+            for i in range(BLOCK_SIZE):
                 i = tl.multiple_of(i, 1)
             output = x + y
             tl.store(out_ptr + offsets, output, mask=mask)
@@ -3160,7 +3160,7 @@ class MutationTests(torch._inductor.test_case.TestCase):
             x = tl.load(x_block_ptr)
 
             # Compute gating
-            for c2 in range(0, tl.cdiv(C2, BLOCK_SIZE_C2)):
+            for c2 in range(tl.cdiv(C2, BLOCK_SIZE_C2)):
                 # Compute block pointers
                 offs_c2 = c2 * BLOCK_SIZE_C2 + tl.arange(0, BLOCK_SIZE_C2)
                 o_block_ptr = O_ptr + offs_m[:, None] * C2 + offs_c2[None, :]
diff --git a/test/jit/xnnpack/test_xnnpack_delegate.py b/test/jit/xnnpack/test_xnnpack_delegate.py
index b97765ed5bb0..f6c7832d5b28 100644
--- a/test/jit/xnnpack/test_xnnpack_delegate.py
+++ b/test/jit/xnnpack/test_xnnpack_delegate.py
@@ -32,7 +32,7 @@ class TestXNNPackBackend(unittest.TestCase):
             },
         )
 
-        for _ in range(0, 20):
+        for _ in range(20):
             sample_input = torch.randn(4, 4, 4)
             actual_output = scripted_module(sample_input)
             expected_output = lowered_module(sample_input)
diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py
index 4cdcac707644..3c3b3f53e528 100644
--- a/test/nn/test_convolution.py
+++ b/test/nn/test_convolution.py
@@ -1292,7 +1292,7 @@ class TestConvolutionNN(NNTestCase):
             kernel_x = torch.zeros([3, 1, 1, radius * 2 + 1], device=image.device)
             image = torch.nn.functional.conv2d(image, kernel_x, groups=image.shape[-3])
 
-        for i in range(0, 128):
+        for i in range(128):
             # This should not fail
             reproducer(radius=i)
 
diff --git a/test/nn/test_embedding.py b/test/nn/test_embedding.py
index fb9d842ce476..f21184290fa1 100644
--- a/test/nn/test_embedding.py
+++ b/test/nn/test_embedding.py
@@ -551,7 +551,7 @@ class TestEmbeddingNNDeviceType(NNTestCase):
                 # Pull out the bag's indices from indices_1D, and fill any
                 # remaining space with padding indices
                 indices_in_bag = []
-                for item_pos in range(0, max_indices_per_bag):
+                for item_pos in range(max_indices_per_bag):
                     if (start + item_pos) < end:
                         indices_in_bag.append(indices_1D[start + item_pos])
                     else:
diff --git a/test/nn/test_multihead_attention.py b/test/nn/test_multihead_attention.py
index 0c04e3b86b88..3dc6a586ced6 100644
--- a/test/nn/test_multihead_attention.py
+++ b/test/nn/test_multihead_attention.py
@@ -485,7 +485,7 @@ class TestMultiheadAttentionNN(NNTestCase):
         )[0]
         output_3d = output_3d.transpose(0, 1)  # [N, T, D]
 
-        for i in range(0, batch_size):
+        for i in range(batch_size):
             output_2d = mta_model(
                 query[i].unsqueeze(0).transpose(0, 1),
                 key[i].unsqueeze(0).transpose(0, 1),
diff --git a/test/nn/test_pooling.py b/test/nn/test_pooling.py
index d282a885f4ed..c3a7b829b2b1 100644
--- a/test/nn/test_pooling.py
+++ b/test/nn/test_pooling.py
@@ -1135,7 +1135,7 @@ torch.cuda.synchronize()
         for size, kernel_size, stride, dilation, ceil_mode in itertools.product(
             sizes, kernel_sizes, strides, dilations, ceil_modes
         ):
-            padding = random.sample(range(0, math.floor(kernel_size / 2) + 1), 1)
+            padding = random.sample(range(math.floor(kernel_size / 2) + 1), 1)
             check(
                 torch.randn(size, device=device, dtype=dtype),
                 kernel_size,
diff --git a/test/onnx/test_onnx_opset.py b/test/onnx/test_onnx_opset.py
index 75de1f3fab83..16ca93dbfe2c 100644
--- a/test/onnx/test_onnx_opset.py
+++ b/test/onnx/test_onnx_opset.py
@@ -36,12 +36,12 @@ def check_onnx_opset_operator(
     # but the op's attributes can optionally be
     # specified as well
     assert len(ops) == len(graph.node)
-    for i in range(0, len(ops)):
+    for i in range(len(ops)):
         assert graph.node[i].op_type == ops[i]["op_name"]
         if "attributes" in ops[i]:
             attributes = ops[i]["attributes"]
             assert len(attributes) == len(graph.node[i].attribute)
-            for j in range(0, len(attributes)):
+            for j in range(len(attributes)):
                 for attribute_field in attributes[j].keys():
                     assert attributes[j][attribute_field] == getattr(
                         graph.node[i].attribute[j], attribute_field
diff --git a/test/optim/test_lrscheduler.py b/test/optim/test_lrscheduler.py
index cea85b07646f..3e65720a45b6 100644
--- a/test/optim/test_lrscheduler.py
+++ b/test/optim/test_lrscheduler.py
@@ -1509,7 +1509,7 @@ class TestLRScheduler(TestCase):
             14.0 / 3,
             29.0 / 6,
         ]
-        deltas = [2 * i for i in range(0, 2)]
+        deltas = [2 * i for i in range(2)]
         base_lrs = [1 + delta for delta in deltas]
         max_lrs = [5 + delta for delta in deltas]
         lr_targets = [[x + delta for x in lr_base_target] for delta in deltas]
diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py
index 1461731a5998..a9321da3fbd3 100644
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@@ -1930,7 +1930,7 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
         event_list.table()
 
     def _check_all_gpu_present(self, gpu_dict, max_gpu_count):
-        for i in range(0, max_gpu_count):
+        for i in range(max_gpu_count):
             self.assertEqual(gpu_dict["GPU " + str(i)], 1)
 
     # Do json sanity testing. Checks that all events are between profiler start and end
@@ -2139,8 +2139,8 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
                         step_helper_funcs.append(event)
             self.assertEqual(len(prof_steps), 5)
             self.assertEqual(len(step_helper_funcs), 5)
-            for i in range(0, len(step_helper_funcs)):
-                for j in range(0, len(step_helper_funcs)):
+            for i in range(len(step_helper_funcs)):
+                for j in range(len(step_helper_funcs)):
                     self.assertTrue(
                         not self._partial_overlap(prof_steps[i], step_helper_funcs[j])
                     )
diff --git a/test/quantization/core/experimental/test_floatx.py b/test/quantization/core/experimental/test_floatx.py
index ee7fe0a9d186..c4cea4073a5c 100644
--- a/test/quantization/core/experimental/test_floatx.py
+++ b/test/quantization/core/experimental/test_floatx.py
@@ -275,7 +275,7 @@ class TestFloat8Dtype(TestCase):
         IMO simpler to special case e8m0 here.
         """
 
-        for biased_exponent in range(0, 256):
+        for biased_exponent in range(256):
             # iterate through all the possible options of guard, round, sticky bits
             # for the current exponent
             for grs in range(8):
diff --git a/test/test_dataloader.py b/test/test_dataloader.py
index da0c12082244..b9000a2c68d3 100644
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@@ -3494,7 +3494,7 @@ class TestIndividualWorkerQueue(TestCase):
             max_num_workers = 1
 
         for batch_size in (8, 16, 32, 64):
-            for num_workers in range(0, min(6, max_num_workers)):
+            for num_workers in range(min(6, max_num_workers)):
                 self._run_ind_worker_queue_test(
                     batch_size=batch_size, num_workers=num_workers + 1
                 )
diff --git a/test/test_datapipe.py b/test/test_datapipe.py
index e92fa2b0615d..2790145665b1 100644
--- a/test/test_datapipe.py
+++ b/test/test_datapipe.py
@@ -520,7 +520,7 @@ class TestIterableDataPipeBasic(TestCase):
         self.assertEqual(list(range(9)), list(n))
 
         # Functional Test: Uneven DataPipes
-        source_numbers = list(range(0, 10)) + [10, 12]
+        source_numbers = list(range(10)) + [10, 12]
         numbers_dp = dp.iter.IterableWrapper(source_numbers)
         n1, n2 = numbers_dp.demux(2, lambda x: x % 2)
         self.assertEqual([0, 2, 4, 6, 8, 10, 12], list(n1))
@@ -1257,7 +1257,7 @@ class TestFunctionalIterDataPipe(TestCase):
         )
         output1, output2 = list(dp1), list(dp2)
         self.assertEqual(list(range(5, 10)), output1)
-        self.assertEqual(list(range(0, 5)), output2)
+        self.assertEqual(list(range(5)), output2)
 
         # Functional Test: values of the same classification are lumped together, and unlimited buffer
         with warnings.catch_warnings(record=True) as wa:
@@ -1271,7 +1271,7 @@ class TestFunctionalIterDataPipe(TestCase):
             self.assertRegex(str(wa[-1].message), r"Unlimited buffer size is set")
         output1, output2 = list(dp1), list(dp2)
         self.assertEqual(list(range(5, 10)), output1)
-        self.assertEqual(list(range(0, 5)), output2)
+        self.assertEqual(list(range(5)), output2)
 
         # Functional Test: classifier returns a value outside of [0, num_instance - 1]
         dp0 = input_dp.demux(num_instances=1, classifier_fn=lambda x: x % 2)
diff --git a/test/test_dynamic_shapes.py b/test/test_dynamic_shapes.py
index fcc45521fbb1..9a6575cf184d 100644
--- a/test/test_dynamic_shapes.py
+++ b/test/test_dynamic_shapes.py
@@ -1385,7 +1385,7 @@ class f(torch.nn.Module):
             self.assertEqual(x.storage_offset(), y.storage_offset())
 
     def test_tensor_factory_with_symint(self):
-        args = list(range(0, 3))
+        args = list(range(3))
         expected = torch.tensor(args)
 
         shape_env = ShapeEnv()
@@ -4291,7 +4291,7 @@ def forward(self, arg0_1: "i64[1][1]cpu", arg1_1: "Sym(u1)", arg2_1: "i64[u1][1]
             start = start.item()
             N = 3
             result = X0[start]
-            for i in range(0, N):
+            for i in range(N):
                 result += X0[start + 1 + i]
             return result
 
diff --git a/test/test_indexing.py b/test/test_indexing.py
index fa91b5903410..99d84a65abca 100644
--- a/test/test_indexing.py
+++ b/test/test_indexing.py
@@ -902,7 +902,7 @@ class TestIndexing(TestCase):
         # Set window size
         W = 10
         # Generate a list of lists, containing overlapping window indices
-        indices = [range(i, i + W) for i in range(0, N - W)]
+        indices = [range(i, i + W) for i in range(N - W)]
 
         for i in [len(indices), 100, 32]:
             windowed_data = t[indices[:i]]
diff --git a/test/test_jit.py b/test/test_jit.py
index 6a3c968f86dd..613903e9a116 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -3153,7 +3153,7 @@ class TestScript(JitTestCase):
             eplan = get_execution_plan(dstate)
             num_bailouts = eplan.code.num_bailouts()
 
-            for i in range(0, num_bailouts):
+            for i in range(num_bailouts):
                 eplan.code.request_bailout(i)
                 self.assertEqual(jitted(x), expected)
 
@@ -5950,7 +5950,7 @@ a")
             # type: (int) -> int
             prev = 1
             v = 1
-            for i in range(0, x):
+            for i in range(x):
                 save = v
                 v = v + prev
                 prev = save
@@ -10938,7 +10938,7 @@ dedent """
 
             # Test symbolic differentiation
             # Run Forward and Backward thrice to trigger autodiff graph
-            for i in range(0, 3):
+            for i in range(3):
                 y = jit_module(x)
                 y.backward(grad)
             x.grad.zero_()
@@ -11802,7 +11802,7 @@ dedent """
         def fn_zip_enumerate(x, y):
             # type: (List[int], List[int]) -> int
             sum = 0
-            for (i, (j, v), k) in zip(x, enumerate(y), range(0, 100)):
+            for (i, (j, v), k) in zip(x, enumerate(y), range(100)):
                 sum += i * j * v * k
 
             return sum
diff --git a/test/test_jit_fuser_te.py b/test/test_jit_fuser_te.py
index 1bda41f7f8f1..dba28f98cbf9 100644
--- a/test/test_jit_fuser_te.py
+++ b/test/test_jit_fuser_te.py
@@ -243,7 +243,7 @@ class TestTEFuser(JitTestCase):
             return x2.sum()
 
         with texpr_reductions_enabled():
-            a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
+            a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
             a = a.reshape(5, 3)
             scripted = self.checkScript(func, (a,))
             self.assertLastGraphAllFused()
@@ -259,7 +259,7 @@ class TestTEFuser(JitTestCase):
             return x.sum((-2,)) * 2
 
         with texpr_reductions_enabled():
-            a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
+            a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
             a = a.reshape(5, 3)
             scripted = self.checkScript(func, (a,))
             self.assertLastGraphAllFused()
@@ -271,7 +271,7 @@ class TestTEFuser(JitTestCase):
             return x.sum((0,), keepdim=True, dtype=torch.double) * 2
 
         with texpr_reductions_enabled():
-            a = torch.tensor(list(range(0, 15)), dtype=torch.float, device="cpu")
+            a = torch.tensor(list(range(15)), dtype=torch.float, device="cpu")
             a = a.reshape(5, 3)
 
             self.checkScript(func, (a,))
@@ -2234,7 +2234,7 @@ class TestTEFuser(JitTestCase):
 
         indices = [0, 1, 2, 3]
         sets = []
-        for i in range(0, len(indices) + 1):
+        for i in range(len(indices) + 1):
             for subset in combinations(indices, i):
                 sets.append(subset)  # noqa: PERF402
 
diff --git a/test/test_matmul_cuda.py b/test/test_matmul_cuda.py
index 61f5642830dd..bf46ee0709fc 100644
--- a/test/test_matmul_cuda.py
+++ b/test/test_matmul_cuda.py
@@ -231,7 +231,7 @@ class TestMatmulCuda(InductorTestCase):
     def test_cublas_addmm_alignment(self, dtype):
         device = 'cuda'
         # perturb X, A, or B alignment
-        for idx in range(0, 3):
+        for idx in range(3):
             for offset in range(1, 3):
                 offsets = [0, 0, 0]
                 offsets[idx] = offset
diff --git a/test/test_mps.py b/test/test_mps.py
index 7346d1d26d44..e825fa77aa89 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -1900,7 +1900,7 @@ class TestMPS(TestCaseMPS):
         res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5)
         self.assertEqual(res_mps, res_cpu)
 
-        for dim in range(0, B_mps.dim()):
+        for dim in range(B_mps.dim()):
             res_mps = torch.linalg.vector_norm(B_mps, ord=3.5, dim=dim)
             res_cpu = torch.linalg.vector_norm(B_cpu, ord=3.5, dim=dim)
             self.assertEqual(res_mps, res_cpu)
@@ -2871,8 +2871,8 @@ class TestMPS(TestCaseMPS):
 
     def test_contiguous_slice_2d(self):
         def helper(shape):
-            for i in range(0, shape[0]):
-                for j in range(0, shape[1]):
+            for i in range(shape[0]):
+                for j in range(shape[1]):
                     t_mps = torch.randn(shape, device="mps")
                     t_cpu = t_mps.detach().clone().cpu()
 
@@ -3432,12 +3432,12 @@ class TestMPS(TestCaseMPS):
         elems = torch.arange(n_tensors * n_tensor_elems, dtype=torch.float32)
 
         tensor_list = []
-        for i in range(0, n_tensors - 1):
+        for i in range(n_tensors - 1):
             # create a list of contiguous view tensors (view tensor created by the slice op)
             t = elems[n_tensor_elems * i : n_tensor_elems * (i + 1)]
             tensor_list.append(t)
 
-        for i in range(0, n_tensors - 1):
+        for i in range(n_tensors - 1):
             t = tensor_list[i].view(1, n_tensor_elems)
             t_mps = t.to("mps")
             self.assertEqual(t, t_mps.cpu(), f"i={i}")
@@ -4942,7 +4942,7 @@ class TestMPS(TestCaseMPS):
             x_mps = fn(torch.zeros(shape, device="mps"), dim=dim)
             self.assertEqual(x_cpu, x_mps.cpu())
         for fn in [torch.any, torch.all]:
-            for dim in range(0, 4):
+            for dim in range(4):
                 helper(fn, dim)
 
         # 6D tensor reductions
@@ -9750,7 +9750,7 @@ class TestGatherScatter(TestCaseMPS):
         self.assertEqual(x_cpu, x_mps)
 
     def test_cast_gather_scatter(self):
-        for _ in range(0, 50):
+        for _ in range(50):
             input = np.random.randint(0, 255, size=(5, 5, 4), dtype=np.uint8)
             with torch.no_grad():
                 s = torch.tensor(input, dtype=torch.uint8, device="mps").unsqueeze(0)
diff --git a/test/test_numa_binding.py b/test/test_numa_binding.py
index 764156ff9b98..c599587e281d 100644
--- a/test/test_numa_binding.py
+++ b/test/test_numa_binding.py
@@ -549,7 +549,7 @@ class NumaBindingTest(TestCase):
             bound_logical_cpu_indices_0,
             # Gets an extra physical core due to odd number of physical cores on numa node
             # 3 physical cores total, 2 GPUs: GPU 0 gets 2 physical cores (CPUs 0-3)
-            set(range(0, 4)),
+            set(range(4)),
         )
 
         bound_logical_cpu_indices_1 = (
@@ -677,7 +677,7 @@ class NumaBindingTest(TestCase):
             # 1 numa node, 2 L3 caches, 1 physical core per L3 cache = 2 logical CPUs per cache
             # L3 cache 0: CPUs 0-1, L3 cache 1: CPUs 2-3
             # Both have same number of CPUs, so prefer lower cache key (0)
-            set(range(0, 2)),
+            set(range(2)),
         )
 
     def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None:
@@ -709,7 +709,7 @@ class NumaBindingTest(TestCase):
             # GPU 0 has numa node stored as -1, which is treated as numa node 0
             # Each numa node has 1 * 1 * 2 = 2 logical CPUs
             # Numa node 0 has CPUs 0-1
-            set(range(0, 2)),
+            set(range(2)),
         )
 
     def test_callable_entrypoint_basic(self) -> None:
diff --git a/test/test_reductions.py b/test/test_reductions.py
index e4fa54491dd0..4a3235fbc50c 100644
--- a/test/test_reductions.py
+++ b/test/test_reductions.py
@@ -1710,7 +1710,7 @@ class TestReductions(TestCase):
                                             with_extremal=False, atol=None, rtol=None,
                                             exact_dtype=True, with_keepdim=False):
         # Test 0-d to 3-d tensors.
-        for ndims in range(0, 4):
+        for ndims in range(4):
             shape = _rand_shape(ndims, min_size=5, max_size=10)
             for n in range(ndims + 1):
                 for c in combinations(list(range(ndims)), n):
@@ -2623,7 +2623,7 @@ class TestReductions(TestCase):
         # Generate some random test cases
         ops = ['quantile', 'nanquantile']
         inputs = [tuple(np.random.randint(2, 10, size=i)) for i in range(1, 4)]
-        quantiles = [tuple(np.random.rand(i)) for i in range(0, 5)]
+        quantiles = [tuple(np.random.rand(i)) for i in range(5)]
         keepdims = [True, False]
 
         # Add corner cases
diff --git a/test/test_serialization.py b/test/test_serialization.py
index 7c4208b6a0d6..a6e3ef23580d 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -295,7 +295,7 @@ class SerializationMixin:
             5,
             6
         ]
-        for i in range(0, 100):
+        for i in range(100):
             data.append(0)
         t = torch.tensor(data, dtype=torch.uint8)
 
diff --git a/test/test_sparse.py b/test/test_sparse.py
index 866f38a316d7..196506a8e13d 100644
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@@ -5300,7 +5300,7 @@ class TestSparseAny(TestCase):
             x_dense = torch.eye(dense_dim, dtype=dtype, device=device)
             for sparse_dim_in in range(1, dense_dim):
                 x_sparse = x_dense.to_sparse(sparse_dim_in)
-                for sparse_dim_out in range(0, dense_dim):
+                for sparse_dim_out in range(dense_dim):
                     if sparse_dim_out == sparse_dim_in:
                         self.assertTrue(x_sparse.to_sparse(sparse_dim_out).sparse_dim() == sparse_dim_out)
                     else:
diff --git a/test/test_sparse_csr.py b/test/test_sparse_csr.py
index 65e800f6eba1..45748c683621 100644
--- a/test/test_sparse_csr.py
+++ b/test/test_sparse_csr.py
@@ -135,7 +135,7 @@ class TestSparseCSRSampler(TestCase):
         index_dtype = torch.int32
         for n_rows in range(1, 10):
             for n_cols in range(1, 10):
-                for nnz in range(0, n_rows * n_cols + 1):
+                for nnz in range(n_rows * n_cols + 1):
                     crow_indices = self._make_crow_indices(
                         n_rows, n_cols, nnz,
                         device=device, dtype=index_dtype)
diff --git a/test/test_static_runtime.py b/test/test_static_runtime.py
index 893aea8e3130..df1e0c3e34fa 100644
--- a/test/test_static_runtime.py
+++ b/test/test_static_runtime.py
@@ -60,7 +60,7 @@ class MultiHeadAttentionLayer(nn.Module):
 # Taken from https://github.com/facebookresearch/dlrm/blob/master/dlrm_s_pytorch.py
 def create_mlp(ln, sigmoid_layer):
     layers = nn.ModuleList()
-    for i in range(0, len(ln) - 1):
+    for i in range(len(ln) - 1):
         n = ln[i]
         m = ln[i + 1]
 
diff --git a/test/test_tensorboard.py b/test/test_tensorboard.py
index cd527db88441..8ff6913887c8 100644
--- a/test/test_tensorboard.py
+++ b/test/test_tensorboard.py
@@ -200,7 +200,7 @@ class TestTensorBoardPyTorchNumpy(BaseTestCase):
                 bucket_counts=counts.tolist(),
             )
 
-            ints = torch.tensor(range(0, 100)).float()
+            ints = torch.tensor(range(100)).float()
             nbins = 100
             counts = torch.histc(ints, bins=nbins, min=0, max=99)
             limits = torch.tensor(range(nbins))
diff --git a/test/test_tensorexpr.py b/test/test_tensorexpr.py
index 17d3a58535d6..57be409ab6b4 100644
--- a/test/test_tensorexpr.py
+++ b/test/test_tensorexpr.py
@@ -1216,7 +1216,7 @@ class TestTensorExprFuser(BaseTestClass):
         @torch.jit.script
         def test(x: torch.Tensor, y: torch.Tensor, z: int) -> torch.Tensor:
             b = y
-            for i in range(0, z):
+            for i in range(z):
                 a = x + y
                 b = b + y
             return b
diff --git a/test/test_torch.py b/test/test_torch.py
index 05ea6ea61db1..9b28b801348a 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -8424,7 +8424,7 @@ tensor([[[1.+1.j, 1.+1.j, 1.+1.j,  ..., 1.+1.j, 1.+1.j, 1.+1.j],
     def test_Size_iter(self):
         for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]:
             x = torch.Size(sizes)
-            for i in range(0, 5):
+            for i in range(5):
                 self.assertEqual(x[i], i + 1)
 
     def test_t_not_2d_error(self):
diff --git a/test/test_view_ops.py b/test/test_view_ops.py
index 5bec225787cc..174632b07988 100644
--- a/test/test_view_ops.py
+++ b/test/test_view_ops.py
@@ -1559,7 +1559,7 @@ class TestOldViewOps(TestCase):
             self.compare_with_numpy(torch_fn, np_fn, x, device=None, dtype=None)
 
     def _test_atleast_dim(self, torch_fn, np_fn, device, dtype):
-        for ndims in range(0, 5):
+        for ndims in range(5):
             shape = _rand_shape(ndims, min_size=5, max_size=10)
             for _ in range(ndims + 1):
                 for with_extremal in [False, True]:
diff --git a/test/test_xnnpack_integration.py b/test/test_xnnpack_integration.py
index 481bd3c76a50..62e257790fd4 100644
--- a/test/test_xnnpack_integration.py
+++ b/test/test_xnnpack_integration.py
@@ -1316,7 +1316,7 @@ class TestXNNPACKConv1dTransformPass(TestCase):
         groups_list = range(1, 3)
         kernel_list = range(1, 4)
         stride_list = range(1, 3)
-        padding_list = range(0, 3)
+        padding_list = range(3)
         dilation_list = range(1, 3)
 
         for hparams in itertools.product(
@@ -1401,7 +1401,7 @@ class TestXNNPACKConv1dTransformPass(TestCase):
         groups_list = range(1, 3)
         kernel_list = range(1, 4)
         stride_list = range(1, 3)
-        padding_list = range(0, 3)
+        padding_list = range(3)
         dilation_list = range(1, 3)
         output_features_list = range(1, 3)
 
diff --git a/torch/_decomp/decompositions_for_jvp.py b/torch/_decomp/decompositions_for_jvp.py
index e11540e0c2ba..fb4a4d85faa2 100644
--- a/torch/_decomp/decompositions_for_jvp.py
+++ b/torch/_decomp/decompositions_for_jvp.py
@@ -147,7 +147,7 @@ def native_layer_norm_backward(
     inner_dims = input_shape[axis:]
     outer_dims = input_shape[:axis]
     inner_dim_indices = list(range(axis, input_ndim))
-    outer_dim_indices = list(range(0, axis))
+    outer_dim_indices = list(range(axis))
 
     N = 1
     for i in inner_dims:
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
index 036f1ba7d01a..451776ef25fd 100644
--- a/torch/_dynamo/eval_frame.py
+++ b/torch/_dynamo/eval_frame.py
@@ -1248,7 +1248,7 @@ def argument_names(
         # signature. Assign names as {varargs}_0, {varargs}_1, ...
         assert fullargspec.varargs is not None, "More arguments than expected"
         input_strs += [
-            f"{fullargspec.varargs}_{i}" for i in range(0, len(args) - len(input_strs))
+            f"{fullargspec.varargs}_{i}" for i in range(len(args) - len(input_strs))
         ]
     elif len(args) < len(fullargspec.args):
         # 3. If there are fewer arguments in `args` than `fullargspec.args`,
@@ -1538,7 +1538,7 @@ class FlattenInputOutputSignature(torch.fx.Transformer):
         }
 
         self.new_args = []
-        for i in range(0, len(flat_args)):
+        for i in range(len(flat_args)):
             arg = super().placeholder(f"arg{i}", (), {})
             if i in matched_input_elements_to_fake:
                 arg.node.meta["val"] = matched_input_elements_to_fake[i]
diff --git a/torch/_inductor/dependencies.py b/torch/_inductor/dependencies.py
index 0547b6b1db90..b431972521da 100644
--- a/torch/_inductor/dependencies.py
+++ b/torch/_inductor/dependencies.py
@@ -151,7 +151,7 @@ class MemoryDep(Dep):
         stride_to_index = {s: i for i, s in enumerate(self_strides)}
         order = [stride_to_index[s] for s in other_strides]
 
-        assert OrderedSet(order) == OrderedSet(range(0, self.num_vars))
+        assert OrderedSet(order) == OrderedSet(range(self.num_vars))
         return order
 
     def get_offset(self) -> sympy.Expr:
diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py
index e89be2299434..1ad443ff387e 100644
--- a/torch/_meta_registrations.py
+++ b/torch/_meta_registrations.py
@@ -1787,7 +1787,7 @@ def _padding_check_valid_input(input, padding, *, dim):
         for d in range(1, input_dim):
             valid_batch_mode = valid_batch_mode and input.size(d) != 0
     else:
-        for d in range(0, input_dim):
+        for d in range(input_dim):
             valid_non_batch_mode = valid_non_batch_mode and input.size(d) != 0
 
     # allow empty batch size but not other dimensions.
diff --git a/torch/_numpy/_funcs_impl.py b/torch/_numpy/_funcs_impl.py
index 4ab3b29d34b8..f57e7fb001fb 100644
--- a/torch/_numpy/_funcs_impl.py
+++ b/torch/_numpy/_funcs_impl.py
@@ -1449,7 +1449,7 @@ def rollaxis(a: ArrayLike, axis, start=0):
         # numpy returns a view, here we try returning the tensor itself
         # return tensor[...]
         return a
-    axes = list(range(0, n))
+    axes = list(range(n))
     axes.remove(axis)
     axes.insert(start, axis)
     return a.view(axes)
diff --git a/torch/_refs/__init__.py b/torch/_refs/__init__.py
index 13d6efd4ac67..822f949d536f 100644
--- a/torch/_refs/__init__.py
+++ b/torch/_refs/__init__.py
@@ -4738,7 +4738,7 @@ def transpose(a: TensorLikeType, dim0: int, dim1: int) -> TensorLikeType:
     if a.ndim <= 1 or dim0 == dim1:
         return aten.alias.default(a)
 
-    _permutation = list(range(0, a.ndim))
+    _permutation = list(range(a.ndim))
     _permutation[_dim0] = _dim1
     _permutation[_dim1] = _dim0
     return torch.permute(a, _permutation)
diff --git a/torch/_tensor_str.py b/torch/_tensor_str.py
index af4deb471db2..86a745f09b44 100644
--- a/torch/_tensor_str.py
+++ b/torch/_tensor_str.py
@@ -307,7 +307,7 @@ def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=N
                 _tensor_str_with_formatter(
                     self[i], indent + 1, summarize, formatter1, formatter2
                 )
-                for i in range(0, PRINT_OPTS.edgeitems)
+                for i in range(PRINT_OPTS.edgeitems)
             ]
             + ["..."]
             + [
@@ -322,7 +322,7 @@ def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=N
             _tensor_str_with_formatter(
                 self[i], indent + 1, summarize, formatter1, formatter2
             )
-            for i in range(0, self.size(0))
+            for i in range(self.size(0))
         ]
 
     tensor_str = ("," + "\n" * (dim - 1) + " " * (indent + 1)).join(slices)
@@ -406,7 +406,7 @@ def get_summarized_data(self):
     if not PRINT_OPTS.edgeitems:
         return self.new_empty([0] * self.dim())
     elif self.size(0) > 2 * PRINT_OPTS.edgeitems:
-        start = [self[i] for i in range(0, PRINT_OPTS.edgeitems)]
+        start = [self[i] for i in range(PRINT_OPTS.edgeitems)]
         end = [self[i] for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))]
         return torch.stack([get_summarized_data(x) for x in (start + end)])
     else:
diff --git a/torch/ao/ns/fx/pattern_utils.py b/torch/ao/ns/fx/pattern_utils.py
index 242d1740d91b..8339ce8f57c1 100644
--- a/torch/ao/ns/fx/pattern_utils.py
+++ b/torch/ao/ns/fx/pattern_utils.py
@@ -28,7 +28,7 @@ def get_type_a_related_to_b(
     for s in base_name_to_sets_of_related_ops.values():
         s_list = list(s)
         # add every bidirectional pair
-        for idx_0 in range(0, len(s_list)):
+        for idx_0 in range(len(s_list)):
             for idx_1 in range(idx_0, len(s_list)):
                 type_a_related_to_b.add((s_list[idx_0], s_list[idx_1]))
                 type_a_related_to_b.add((s_list[idx_1], s_list[idx_0]))
diff --git a/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py b/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
index ef6a35686c7d..4330b0e24253 100644
--- a/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
+++ b/torch/ao/pruning/_experimental/activation_sparsifier/activation_sparsifier.py
@@ -158,9 +158,9 @@ class ActivationSparsifier:
                 # data should be a list [aggregated over each feature only]
                 if data is None:
                     out_data = [
-                        0 for _ in range(0, len(features))
+                        0 for _ in range(len(features))
                     ]  # create one in case of 1st forward
-                    self.state[name]["mask"] = [0 for _ in range(0, len(features))]
+                    self.state[name]["mask"] = [0 for _ in range(len(features))]
                 else:
                     out_data = data  # a list
 
@@ -336,7 +336,7 @@ class ActivationSparsifier:
                 return input_data * mask
             else:
                 # apply per feature, feature_dim
-                for feature_idx in range(0, len(features)):
+                for feature_idx in range(len(features)):
                     feature = (
                         torch.Tensor([features[feature_idx]])
                         .long()
diff --git a/torch/ao/pruning/_experimental/data_sparsifier/benchmarks/evaluate_disk_savings.py b/torch/ao/pruning/_experimental/data_sparsifier/benchmarks/evaluate_disk_savings.py
index 8192b617139b..0e25f59cea64 100644
--- a/torch/ao/pruning/_experimental/data_sparsifier/benchmarks/evaluate_disk_savings.py
+++ b/torch/ao/pruning/_experimental/data_sparsifier/benchmarks/evaluate_disk_savings.py
@@ -99,7 +99,7 @@ def sparsify_model(path_to_model, sparsified_model_dump_path):
         sparse_block_shapes (List of tuples)
             List of sparse block shapes to be sparsified on
     """
-    sparsity_levels = [sl / 10 for sl in range(0, 10)]
+    sparsity_levels = [sl / 10 for sl in range(10)]
     sparsity_levels += [0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0]
 
     norms = ["L1", "L2"]
diff --git a/torch/ao/pruning/_experimental/data_sparsifier/lightning/tests/test_callbacks.py b/torch/ao/pruning/_experimental/data_sparsifier/lightning/tests/test_callbacks.py
index 442639be9b21..5a36e13c7b46 100644
--- a/torch/ao/pruning/_experimental/data_sparsifier/lightning/tests/test_callbacks.py
+++ b/torch/ao/pruning/_experimental/data_sparsifier/lightning/tests/test_callbacks.py
@@ -299,7 +299,7 @@ class TestTrainingAwareCallback(TestCase):
         self._check_on_train_start(pl_module, callback, sparsifier_args, scheduler_args)
 
         num_epochs = 5
-        for _ in range(0, num_epochs):
+        for _ in range(num_epochs):
             self._check_on_train_epoch_start(pl_module, callback)
             self._simulate_update_param_model(pl_module)
             self._check_on_train_epoch_end(pl_module, callback)
diff --git a/torch/ao/pruning/sparsifier/nearly_diagonal_sparsifier.py b/torch/ao/pruning/sparsifier/nearly_diagonal_sparsifier.py
index a4d42ea80328..26fb3a98b8fb 100644
--- a/torch/ao/pruning/sparsifier/nearly_diagonal_sparsifier.py
+++ b/torch/ao/pruning/sparsifier/nearly_diagonal_sparsifier.py
@@ -53,7 +53,7 @@ class NearlyDiagonalSparsifier(base_sparsifier.BaseSparsifier):
                 "nearliness cannot be larger than the dimensions of tensor."
             )
 
-        for row in range(0, height):
+        for row in range(height):
             # Bounds of entries that needs to be set to 1
             low = max(0, row - dist_to_diagonal)
             high = min(width, row + dist_to_diagonal + 1)
diff --git a/torch/ao/quantization/experimental/observer.py b/torch/ao/quantization/experimental/observer.py
index 7d9432ab27ec..e61fcb67c94a 100644
--- a/torch/ao/quantization/experimental/observer.py
+++ b/torch/ao/quantization/experimental/observer.py
@@ -68,10 +68,10 @@ class APoTObserver(ObserverBase):
         p_all = []
 
         # create levels
-        for i in range(0, self.n):
+        for i in range(self.n):
             p_curr = torch.tensor([0])
 
-            for j in range(0, (2**self.k - 2) + 1):
+            for j in range((2**self.k - 2) + 1):
                 curr_ele = 2 ** (-(i + j * self.n))
                 p_append = torch.tensor([curr_ele])
                 p_curr = torch.cat((p_curr, p_append))
diff --git a/torch/ao/quantization/fx/_decomposed.py b/torch/ao/quantization/fx/_decomposed.py
index 160e9aa3afef..b145cbfaeeba 100644
--- a/torch/ao/quantization/fx/_decomposed.py
+++ b/torch/ao/quantization/fx/_decomposed.py
@@ -1159,7 +1159,7 @@ class FakeQuantPerChannel(torch.autograd.Function):
             f"Expecting input to have dtype torch.float32, but got dtype: {input.dtype}"
         )
         assert axis < input.dim(), f"Expecting axis to be < {input.dim()}"
-        broadcast_dims = list(range(0, axis)) + list(range(axis + 1, input.ndim))
+        broadcast_dims = list(range(axis)) + list(range(axis + 1, input.ndim))
         unsqueeze_scales = _unsqueeze_multiple(scales, broadcast_dims)
         unsqueeze_zero_points = _unsqueeze_multiple(zero_points, broadcast_dims)
         temp = torch.round(input * (1.0 / unsqueeze_scales)) + unsqueeze_zero_points
diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py
index 322d39f72202..cdab6259d85b 100644
--- a/torch/autograd/profiler.py
+++ b/torch/autograd/profiler.py
@@ -1212,7 +1212,7 @@ class KinetoStepTracker:
                     "Profiler step count has increased more than 1 - "
                     f"current_step = {cls._current_step} step dict =  {cls._step_dict}"
                 )
-            for _ in range(0, delta):
+            for _ in range(delta):
                 _kineto_step()
             cls._current_step = new_step
         return cls._current_step
diff --git a/torch/distributed/_pycute/layout.py b/torch/distributed/_pycute/layout.py
index be25cad2e953..04ae5d1fa5fd 100644
--- a/torch/distributed/_pycute/layout.py
+++ b/torch/distributed/_pycute/layout.py
@@ -162,7 +162,7 @@ def coalesce(layout: Layout, profile: LayoutProfile = None) -> Layout:
         assert len(layout) >= len(profile)
         return make_layout(
             chain(
-                (coalesce(layout[i], profile[i]) for i in range(0, len(profile))),  # type: ignore[arg-type]
+                (coalesce(layout[i], profile[i]) for i in range(len(profile))),  # type: ignore[arg-type]
                 (layout[i] for i in range(len(profile), len(layout))),
             )
         )
@@ -203,7 +203,7 @@ def filter(layout: Layout, profile: LayoutProfile = None) -> Layout:
         assert len(layout) >= len(profile)
         return make_layout(
             chain(
-                (filter(layout[i], profile[i]) for i in range(0, len(profile))),  # type: ignore[arg-type]
+                (filter(layout[i], profile[i]) for i in range(len(profile))),  # type: ignore[arg-type]
                 (layout[i] for i in range(len(profile), len(layout))),
             )
         )
@@ -233,7 +233,7 @@ def composition(layoutA: Layout, layoutB: LayoutInput) -> Layout:
         assert len(layoutA) >= len(layoutB)
         return make_layout(
             chain(
-                (composition(layoutA[i], layoutB[i]) for i in range(0, len(layoutB))),  # type: ignore[arg-type]
+                (composition(layoutA[i], layoutB[i]) for i in range(len(layoutB))),  # type: ignore[arg-type]
                 (layoutA[i] for i in range(len(layoutB), len(layoutA))),
             )
         )
@@ -371,7 +371,7 @@ def logical_divide(layoutA: Layout, layoutB: LayoutInput) -> Layout:
             chain(
                 (
                     logical_divide(layoutA[i], layoutB[i])  # type: ignore[arg-type]
-                    for i in range(0, len(layoutB))
+                    for i in range(len(layoutB))
                 ),
                 (layoutA[i] for i in range(len(layoutB), len(layoutA))),
             )
@@ -396,7 +396,7 @@ def logical_product(layoutA: Layout, layoutB: LayoutInput) -> Layout:
             chain(
                 (
                     logical_product(layoutA[i], layoutB[i])  # type: ignore[arg-type]
-                    for i in range(0, len(layoutB))
+                    for i in range(len(layoutB))
                 ),
                 (layoutA[i] for i in range(len(layoutB), len(layoutA))),
             )
@@ -421,14 +421,14 @@ def hier_unzip(
         # A layout with shape ((A,a),(B,b),(C,c))
         split = make_layout(
             hier_unzip(splitter, layoutA[i], layoutB[i])  # type: ignore[arg-type]
-            for i in range(0, len(layoutB))
+            for i in range(len(layoutB))
         )
         # Gather to shape ((A,B,C,...),(a,b,c,...,y,z))
         return make_layout(
-            make_layout(split[i][0] for i in range(0, len(layoutB))),  # type: ignore[arg-type]
+            make_layout(split[i][0] for i in range(len(layoutB))),  # type: ignore[arg-type]
             make_layout(
                 chain(  # type: ignore[arg-type]
-                    (split[i][1] for i in range(0, len(layoutB))),
+                    (split[i][1] for i in range(len(layoutB))),
                     (layoutA[i] for i in range(len(layoutB), len(layoutA))),
                 )
             ),
diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py
index 1c576e886fe1..132a40977f85 100644
--- a/torch/distributed/_symmetric_memory/__init__.py
+++ b/torch/distributed/_symmetric_memory/__init__.py
@@ -1671,7 +1671,7 @@ def _low_contention_all_gather(
             local_buf.copy_(tensor)
         # pull
         symm_mem.barrier()
-        for step in range(0, world_size):
+        for step in range(world_size):
             remote_rank = (rank - step) % world_size
             src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype)
             chunks[remote_rank].copy_(src_buf)
@@ -1706,7 +1706,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input(
     with _get_backend_stream():
         # pull + offline reduction
         symm_mem.barrier()
-        for step in range(0, world_size):
+        for step in range(world_size):
             remote_rank = (rank - step) % world_size
             src_buf = symm_mem.get_buffer(
                 remote_rank,
@@ -1743,7 +1743,7 @@ def _low_contention_reduce_scatter_with_workspace(
     with _get_backend_stream():
         # push + offline reduction
         workspace.barrier()
-        for step in range(0, world_size):
+        for step in range(world_size):
             remote_rank = (rank - step) % world_size
             dst_buf = workspace.get_buffer(
                 remote_rank, chunks[0].shape, chunks[0].dtype, chunks[0].numel() * rank
diff --git a/torch/distributed/elastic/multiprocessing/api.py b/torch/distributed/elastic/multiprocessing/api.py
index d91974548221..9bb580c5bf78 100644
--- a/torch/distributed/elastic/multiprocessing/api.py
+++ b/torch/distributed/elastic/multiprocessing/api.py
@@ -727,7 +727,7 @@ class MultiprocessContext(PContext):
             # pipe. Hence to prevent deadlocks on large return values,
             # we opportunistically try queue.get on each join call
             # See: https://docs.python.org/2/library/multiprocessing.html#all-platforms
-            for local_rank in range(0, self.nprocs):
+            for local_rank in range(self.nprocs):
                 return_queue = self._ret_vals[local_rank]
                 if not return_queue.empty():
                     # save the return values temporarily into a member var
diff --git a/torch/distributed/elastic/timer/local_timer.py b/torch/distributed/elastic/timer/local_timer.py
index d55cc6ac6e37..5e66ef3fae34 100644
--- a/torch/distributed/elastic/timer/local_timer.py
+++ b/torch/distributed/elastic/timer/local_timer.py
@@ -59,7 +59,7 @@ class MultiprocessingRequestQueue(RequestQueue):
     def get(self, size, timeout: float) -> list[TimerRequest]:
         requests = []
         wait = timeout
-        for _ in range(0, size):
+        for _ in range(size):
             start = time.time()
 
             try:
diff --git a/torch/distributed/tensor/_dtensor_spec.py b/torch/distributed/tensor/_dtensor_spec.py
index e12f41c4858b..42cb7fcd7c33 100644
--- a/torch/distributed/tensor/_dtensor_spec.py
+++ b/torch/distributed/tensor/_dtensor_spec.py
@@ -107,7 +107,7 @@ class DTensorSpec:
         # follow default left-to-right device order if shard_order is not specified
         tensor_dim_to_mesh_dims: defaultdict[int, list[int]] = defaultdict(list)
         mesh_ndim = len(placements)
-        for mesh_dim in range(0, mesh_ndim):
+        for mesh_dim in range(mesh_ndim):
             # shard_order doesn't work with _StridedShard
             if isinstance(placements[mesh_dim], _StridedShard):
                 return ()
diff --git a/torch/distributed/tensor/parallel/fsdp.py b/torch/distributed/tensor/parallel/fsdp.py
index 6cffbdb83d2f..f5367397cc80 100644
--- a/torch/distributed/tensor/parallel/fsdp.py
+++ b/torch/distributed/tensor/parallel/fsdp.py
@@ -306,7 +306,7 @@ def _all_gather_dtensor(
     placements = list(copy.deepcopy(tensor.placements))
     # FSDP + TP: [Shard(0), tp_placement] -> [Replicate(), tp_placement]
     # HSDP + TP: [Replicate(), Shard(0), tp_placement] -> [Replicate(), Replicate(), tp_placement]
-    for i in range(0, len(placements) - 1):
+    for i in range(len(placements) - 1):
         placements[i] = Replicate()
     tensor = tensor.redistribute(
         device_mesh=tensor.device_mesh,
diff --git a/torch/nested/_internal/ops.py b/torch/nested/_internal/ops.py
index f52bfab2a8b3..bdca74c13b1d 100644
--- a/torch/nested/_internal/ops.py
+++ b/torch/nested/_internal/ops.py
@@ -1112,7 +1112,7 @@ def chunk_default(func, *args, **kwargs):
         # the input number; it can be counter-intuitive, but it matches dense behavior.
         return [
             NestedTensor(values=chunk_values[i], **(nested_kwargs[i]))
-            for i in range(0, len(chunk_values))
+            for i in range(len(chunk_values))
         ]
     else:
         return [
diff --git a/torch/onnx/_internal/torchscript_exporter/symbolic_helper.py b/torch/onnx/_internal/torchscript_exporter/symbolic_helper.py
index bcd36a6ac41b..3f92f6418c89 100644
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_helper.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_helper.py
@@ -1005,7 +1005,7 @@ def _interpolate_size_to_scales(g: jit_utils.GraphContext, input, output_size, d
             if i < 2
             else float(output_size[-(dim - i)])
             / float(input.type().sizes()[-(dim - i)])
-            for i in range(0, dim)
+            for i in range(dim)
         ]
         scales = g.op(
             "Constant", value_t=torch.tensor(scales_constant, dtype=torch.float32)
diff --git a/torch/onnx/_internal/torchscript_exporter/symbolic_opset12.py b/torch/onnx/_internal/torchscript_exporter/symbolic_opset12.py
index 822e14556768..d4b887560f9b 100644
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset12.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset12.py
@@ -331,7 +331,7 @@ def unfold(g: jit_utils.GraphContext, input, dimension, size, step):
 
         ndim = symbolic_helper._get_tensor_rank(input)
         assert ndim is not None
-        perm = list(range(0, ndim))
+        perm = list(range(ndim))
         perm.append(perm.pop(dimension))
 
         unsqueeze_list = []
diff --git a/torch/onnx/_internal/torchscript_exporter/symbolic_opset8.py b/torch/onnx/_internal/torchscript_exporter/symbolic_opset8.py
index bde072608088..8ba8e6ee6622 100644
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset8.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset8.py
@@ -116,7 +116,7 @@ def _interpolate(name, dim, interpolate_mode):
                 if i < 2
                 else float(output_size[-(dim - i)])
                 / float(input.type().sizes()[-(dim - i)])
-                for i in range(0, dim)
+                for i in range(dim)
             ]
         return g.op("Upsample", input, mode_s=interpolate_mode, scales_f=scales)
 
diff --git a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
index 9b7aba64ef31..16e94b91f89f 100644
--- a/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
+++ b/torch/onnx/_internal/torchscript_exporter/symbolic_opset9.py
@@ -840,7 +840,7 @@ def t(g: jit_utils.GraphContext, self):
 def numpy_T(g: jit_utils.GraphContext, input):
     ndim = symbolic_helper._get_tensor_rank(input)
     assert ndim is not None
-    perm = list(reversed(range(0, ndim)))
+    perm = list(reversed(range(ndim)))
     return g.op("Transpose", input, perm_i=perm)
 
 
@@ -990,7 +990,7 @@ def transpose(g: jit_utils.GraphContext, self, dim0, dim1):
 @_onnx_symbolic("aten::permute")
 @symbolic_helper.parse_args("v", "is")
 def permute(g: jit_utils.GraphContext, self, dims):
-    if dims == list(range(0, len(dims))):
+    if dims == list(range(len(dims))):
         return self
     return g.op("Transpose", self, perm_i=dims)
 
@@ -1368,7 +1368,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
         )
     ceiled_output_dim = [
         math.ceil((dim[i] + 2 * padding[i] - kernel_size[i]) / float(stride[i])) + 1
-        for i in range(0, len(padding))
+        for i in range(len(padding))
     ]
     # ensure last pooling starts inside
     ceiled_output_dim = [
@@ -1377,7 +1377,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
             if (((ceiled_output_dim[i] - 1) * stride[i]) >= (dim[i] + padding[i]))
             else ceiled_output_dim[i]
         )
-        for i in range(0, len(ceiled_output_dim))
+        for i in range(len(ceiled_output_dim))
     ]
     padding_ceil = [
         (
@@ -1392,7 +1392,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
                 )
             )
         )
-        for i in range(0, len(padding))
+        for i in range(len(padding))
     ]
     # ensure padding is not > kernel_size
     padding_ceil = [
@@ -1405,7 +1405,7 @@ def get_pool_ceil_padding(input, kernel_size, stride, padding):
             if ((padding_ceil[i] + 2 * padding[i]) >= (kernel_size[i]))
             else int(padding_ceil[i])
         )
-        for i in range(0, len(padding_ceil))
+        for i in range(len(padding_ceil))
     ]
     return padding_ceil
 
@@ -1697,14 +1697,14 @@ def _adaptive_pool(name, type, tuple_fn, fn=None):
                 name, "input size not accessible", input
             )
         # verify if output size % input size = 0 for all dim
-        mod = [dim[i] % output_size[i] for i in range(0, len(dim))]
+        mod = [dim[i] % output_size[i] for i in range(len(dim))]
         if mod != [0] * len(mod):
             if output_size == [1] * len(output_size):
                 return g.op("GlobalMaxPool", input), None
             return symbolic_helper._unimplemented(
                 name, "output size that are not factor of input size", output_size_value
             )
-        k = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]
+        k = [int(dim[i] / output_size[i]) for i in range(len(dim))]
         # call max_poolxd_with_indices to get indices in the output
         if type == "MaxPool":
             # pyrefly: ignore  # not-callable
@@ -2906,7 +2906,7 @@ def unfold(g: jit_utils.GraphContext, input, dimension, size, step):
             for low, hi in zip(low_indices, hi_indices)
         ]
         ndim = len(sizes)
-        perm = list(range(0, ndim))
+        perm = list(range(ndim))
         perm.append(perm.pop(dimension))
         unsqueeze = [
             symbolic_helper._unsqueeze_helper(
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index 82e630519eb8..0cecc762bce4 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -11615,7 +11615,7 @@ def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=Fal
         # numpy searchsorted only supports 1D inputs so we split up ND inputs
         orig_shape = boundary.shape
         num_splits = np.prod(sorted_sequence.shape[:-1])
-        splits = range(0, num_splits)
+        splits = range(num_splits)
         sorted_sequence, boundary = sorted_sequence.reshape(num_splits, -1), boundary.reshape(num_splits, -1)
         if sorter is not None:
             sorter = sorter.reshape(num_splits, -1)
@@ -16258,7 +16258,7 @@ op_db: list[OpInfo] = [
         aten_backward_name='_prelu_kernel_backward',
         ref=lambda x, weight:
             np.maximum(0., x) + np.minimum(0., x) *
-            (weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(0, x.ndim)])),
+            (weight if x.ndim == 1 else weight.reshape([weight.size if i == 1 else 1 for i in range(x.ndim)])),
         dtypes=floating_types_and(torch.bfloat16, torch.float16),
         supports_forward_ad=True,
         supports_fwgrad_bwgrad=True,
diff --git a/torch/testing/_internal/common_nn.py b/torch/testing/_internal/common_nn.py
index 68a35e8c40a1..3153359326dc 100644
--- a/torch/testing/_internal/common_nn.py
+++ b/torch/testing/_internal/common_nn.py
@@ -2896,7 +2896,7 @@ def _multilabelmarginloss_reference(input, target):
 
     sum = 0
     for target_index in targets:
-        for i in range(0, len(input)):
+        for i in range(len(input)):
             if i not in targets:
                 sum += max(0, 1 - input[target_index] + input[i])
 
@@ -2914,7 +2914,7 @@ def multilabelmarginloss_reference(input, target, reduction='mean'):
     n = input.size(0)
     dim = input.size(1)
     output = input.new(n).zero_()
-    for i in range(0, n):
+    for i in range(n):
         output[i] = _multilabelmarginloss_reference(input[i], target[i])
 
     if reduction == 'mean':
@@ -2955,7 +2955,7 @@ def _multimarginloss_reference(input, target_idx, p, margin, weight):
         weight = input.new(len(input)).fill_(1)
 
     output = 0
-    for i in range(0, len(input)):
+    for i in range(len(input)):
         if i != target_idx:
             output += weight[target_idx] * (max(0, (margin - input[target_idx] + input[i])) ** p)
     return output
@@ -2972,7 +2972,7 @@ def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reducti
     n = input.size(0)
     dim = input.size(1)
     output = input.new(n)
-    for x in range(0, n):
+    for x in range(n):
         output[x] = _multimarginloss_reference(input[x], target[x], p, margin, weight)
 
     if reduction == 'mean':
@@ -2987,7 +2987,7 @@ def multimarginloss_reference(input, target, p=1, margin=1, weight=None, reducti
 def cosineembeddingloss_reference(input1, input2, target, margin=0, reduction='mean'):
     def _cos(a, b):
         cos = a.new(a.size(0))
-        for i in range(0, a.size(0)):
+        for i in range(a.size(0)):
             cos[i] = (a[i] * b[i]).sum() / ((((a[i] * a[i]).sum() + 1e-12) * ((b[i] * b[i]).sum() + 1e-12)) ** 0.5)
         return cos
 
diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py
index a9beb0e60865..22d6d8e7dede 100644
--- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py
+++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py
@@ -705,7 +705,7 @@ class LocalDTensorTestBase(DTensorTestBase):
         self.skipTest(msg)
 
     def _get_local_tensor_mode(self):
-        return LocalTensorMode(frozenset(range(0, self.world_size)))
+        return LocalTensorMode(frozenset(range(self.world_size)))
 
     def setUp(self) -> None:
         super().setUp()
diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py
index c41602d43994..499341b07951 100644
--- a/torch/testing/_internal/distributed/distributed_test.py
+++ b/torch/testing/_internal/distributed/distributed_test.py
@@ -658,13 +658,13 @@ class DistributedTest:
             return (group, group_id, rank)
 
         def _init_full_group_test(self, **kwargs):
-            group = list(range(0, dist.get_world_size()))
+            group = list(range(dist.get_world_size()))
             group_id = dist.new_group(**kwargs)
             rank = dist.get_rank()
             return (group, group_id, rank)
 
         def _init_global_test(self):
-            group = list(range(0, dist.get_world_size()))
+            group = list(range(dist.get_world_size()))
             group_id = dist.group.WORLD
             rank = dist.get_rank()
             return (group, group_id, rank)
@@ -1114,7 +1114,7 @@ class DistributedTest:
                 averager = averagers.PeriodicModelAverager(
                     period=period, warmup_steps=warmup_steps
                 )
-                for step in range(0, 20):
+                for step in range(20):
                     # Reset the parameters at every step.
                     param.data = copy.deepcopy(tensor)
                     for params in model.parameters():
@@ -1143,7 +1143,7 @@ class DistributedTest:
                 averager = averagers.PeriodicModelAverager(
                     period=period, warmup_steps=warmup_steps
                 )
-                for step in range(0, 20):
+                for step in range(20):
                     # Reset the parameters at every step.
                     for param_group in opt.param_groups:
                         for params in param_group["params"]:
@@ -1203,7 +1203,7 @@ class DistributedTest:
                 averager = averagers.PeriodicModelAverager(
                     period=period, warmup_steps=warmup_steps
                 )
-                for step in range(0, 20):
+                for step in range(20):
                     # Reset the parameters at every step.
                     param.data = copy.deepcopy(tensor)
                     for params in model.parameters():
@@ -1284,7 +1284,7 @@ class DistributedTest:
             expected_global_avg_tensor = (
                 torch.ones_like(param.data) * sum(range(world_size)) / world_size
             )
-            for step in range(0, 25):
+            for step in range(25):
                 # Reset the parameters at every step.
                 param.data = copy.deepcopy(tensor)
                 for params in model.parameters():
@@ -1390,7 +1390,7 @@ class DistributedTest:
 
             for val in ["1", "0"]:
                 os.environ["TORCH_NCCL_BLOCKING_WAIT"] = val
-                for src in range(0, world_size):
+                for src in range(world_size):
                     send_tensor = _build_tensor(rank + 1, device_id=device_id).fill_(
                         src
                     )
@@ -1409,7 +1409,7 @@ class DistributedTest:
                 for req in reqs:
                     req.wait()
 
-                for src in range(0, world_size):
+                for src in range(world_size):
                     self.assertEqual(recv_tensors[src], expected_tensors[src])
 
             self._barrier()
@@ -1505,7 +1505,7 @@ class DistributedTest:
             rank = dist.get_rank()
             p2p_op_list = []
 
-            for src in range(0, dist.get_world_size()):
+            for src in range(dist.get_world_size()):
                 if src == rank:
                     continue
                 send_tensor = _build_tensor(rank + 1)
@@ -1528,7 +1528,7 @@ class DistributedTest:
             rank = dist.get_rank()
             p2p_op_list = []
 
-            for src in range(0, dist.get_world_size()):
+            for src in range(dist.get_world_size()):
                 if src == rank:
                     continue
                 send_tensor = _build_tensor(rank + 1)
@@ -1602,10 +1602,10 @@ class DistributedTest:
             tensor = _build_tensor(rank + 1, device_id=device_id)
             profiler_cls = profiler_ctx if profiler_ctx is not None else nullcontext()
             with profiler_cls as prof:
-                for src in range(0, world_size):
+                for src in range(world_size):
                     if src == rank:
                         # Send mode
-                        for dst in range(0, world_size):
+                        for dst in range(world_size):
                             if dst == rank:
                                 continue
                             dist.send(tensor, dst)
@@ -1674,10 +1674,10 @@ class DistributedTest:
             tensor = _build_tensor(send_size)
             ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
             with ctx as prof:
-                for src in range(0, dist.get_world_size()):
+                for src in range(dist.get_world_size()):
                     if src == rank:
                         # Send mode
-                        for dst in range(0, dist.get_world_size()):
+                        for dst in range(dist.get_world_size()):
                             if dst == rank:
                                 continue
                             dist.send(tensor, dst)
@@ -1742,10 +1742,10 @@ class DistributedTest:
 
             ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
             with ctx as prof:
-                for dst in range(0, dist.get_world_size()):
+                for dst in range(dist.get_world_size()):
                     if dst == rank:
                         # Recv mode
-                        for dst in range(0, dist.get_world_size()):
+                        for dst in range(dist.get_world_size()):
                             if dst == rank:
                                 continue
 
@@ -1846,10 +1846,10 @@ class DistributedTest:
             tensor = _build_tensor(send_recv_size, value=rank)
             ctx = profiler_ctx if profiler_ctx is not None else nullcontext()
             with ctx as prof:
-                for dst in range(0, world_size):
+                for dst in range(world_size):
                     if dst == rank:
                         # Recv mode
-                        for src in range(0, world_size):
+                        for src in range(world_size):
                             if src == rank:
                                 continue
                             output_tensor = _build_tensor(send_recv_size, value=-1)
@@ -7480,7 +7480,7 @@ class DistributedTest:
                 for baseline_iter in baseline_num_iters:
                     for offset in iteration_offsets:
                         mapping = dict.fromkeys(
-                            range(0, num_early_join_ranks), baseline_iter
+                            range(num_early_join_ranks), baseline_iter
                         )
                         # if num_early_join_ranks > 1, ranks > 0 that will join early
                         # iterate offset//2 more times than rank 0, to test nodes
diff --git a/torch/testing/_internal/distributed/multi_threaded_pg.py b/torch/testing/_internal/distributed/multi_threaded_pg.py
index 2cc22cb7c23a..79aff05b3421 100644
--- a/torch/testing/_internal/distributed/multi_threaded_pg.py
+++ b/torch/testing/_internal/distributed/multi_threaded_pg.py
@@ -166,7 +166,7 @@ class AllReduce:
             # collect all data to the list and make them
             # all on rank 0 device
             tensors = [
-                data[src_rank][i].to(rank_0_device) for src_rank in range(0, len(data))
+                data[src_rank][i].to(rank_0_device) for src_rank in range(len(data))
             ]
 
             # now mimic reduce across all ranks
diff --git a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
index 1d6c7500c5ad..3c5c9101e43c 100644
--- a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
+++ b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py
@@ -266,7 +266,7 @@ class CommonDistAutogradTest(RpcAgentTestFixture):
         grads = dist_autograd.get_gradients(context_id)
         nargs = len(args)
         ngrads = 0
-        for i in range(0, nargs):
+        for i in range(nargs):
             if local_grads[i] is not None:
                 self.assertIn(args[i], grads)
                 self.assertEqual(local_grads[i], grads[args[i]])
@@ -1973,7 +1973,7 @@ class DistAutogradTest(CommonDistAutogradTest):
         DistAutogradTest._test_clean_context_backward_context_id = context_id
 
         # Send the context id to all nodes.
-        for i in range(0, self.world_size):
+        for i in range(self.world_size):
             if i != self.rank:
                 rank_distance = (i - self.rank + self.world_size) % self.world_size
                 rpc.rpc_sync(
@@ -1988,7 +1988,7 @@ class DistAutogradTest(CommonDistAutogradTest):
         self.assertEqual(self.world_size - 1, len(known_context_ids))
 
         t1 = torch.rand((3, 3), requires_grad=True)
-        for i in range(0, 100):
+        for i in range(100):
             dst = self._next_rank()
             t1 = rpc.rpc_sync(worker_name(dst), torch.add, args=(t1, t1))
 
diff --git a/torch/testing/_internal/distributed/rpc/rpc_test.py b/torch/testing/_internal/distributed/rpc/rpc_test.py
index 4ec964092b39..03469e473921 100644
--- a/torch/testing/_internal/distributed/rpc/rpc_test.py
+++ b/torch/testing/_internal/distributed/rpc/rpc_test.py
@@ -1818,7 +1818,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
         # Spawn multiple threads that send RPCs to ensure keys are correctly
         # prefixed when there are multiple RPCs being created/in flight at the
         # same time.
-        dst_ranks = [rank for rank in range(0, self.world_size) if rank != self.rank]
+        dst_ranks = [rank for rank in range(self.world_size) if rank != self.rank]
 
         def rpc_with_profiling(dst_worker):
             with _profile() as prof:
@@ -1884,7 +1884,7 @@ class RpcTest(RpcAgentTestFixture, RpcTestCommon):
         if self.rank != 1:
             return
 
-        dst_ranks = [rank for rank in range(0, self.world_size) if rank != self.rank]
+        dst_ranks = [rank for rank in range(self.world_size) if rank != self.rank]
         for dst in dst_ranks:
             dst_worker = worker_name(dst)
             with _profile() as prof:
diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py
index e98d0e482683..ce8e68ae1e2c 100644
--- a/torch/testing/_internal/jit_utils.py
+++ b/torch/testing/_internal/jit_utils.py
@@ -439,7 +439,7 @@ class JitTestCase(JitCommonTestCase):
         state = model.get_debug_state()
         plan = get_execution_plan(state)
         num_bailouts = plan.code.num_bailouts()
-        for i in range(0, num_bailouts):
+        for i in range(num_bailouts):
             plan.code.request_bailout(i)
             bailout_outputs = model(*inputs)
             self.assertEqual(bailout_outputs, expected)
diff --git a/torch/testing/_internal/triton_utils.py b/torch/testing/_internal/triton_utils.py
index 4edaf86dd1d7..0964c68ebb20 100644
--- a/torch/testing/_internal/triton_utils.py
+++ b/torch/testing/_internal/triton_utils.py
@@ -912,7 +912,7 @@ if has_triton():
         b_ptrs = b_ptr + (offs_k[:, None] + offs_bn[None, :])
 
         accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
-        for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
+        for k in range(tl.cdiv(K, BLOCK_SIZE_K)):
             a = tl.load(a_ptrs, mask=offs_k[None, :] < K - k * BLOCK_SIZE_K, other=0.0)
             b = tl.load(b_ptrs, mask=offs_k[:, None] < K - k * BLOCK_SIZE_K, other=0.0)
             accumulator = tl.dot(a, b, accumulator)