Fix G001,G002,G003 in logs to % syntax (#97812)

Signed-off-by: Edward Z. Yang <ezyang@meta.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/97812 Approved by: https://github.com/Skylion007, https://github.com/kiukchung, https://github.com/malfet, https://github.com/mlazos
2025-10-20 21:14:14 +08:00 · 2023-03-31 12:53:36 -04:00
parent 7f9533e224
commit 5df59f957f
12 changed files with 70 additions and 77 deletions
--- a/.flake8
+++ b/.flake8
@ -16,7 +16,7 @@ ignore =
    # these ignores are from flake8-comprehensions; please fix!
    C407
    # these ignores are from flake8-logging-format; please fix!
-    G001,G002,G003,G004,G100,G101,G200,G201,G202,
+    G004,G100,G101,G200,G201,G202
    # these ignores are from flake8-simplify. please fix or ignore with commented reason
    SIM105,SIM108,SIM109,SIM110,SIM111,SIM113,SIM114,SIM115,SIM116,SIM117,SIM118,SIM119,SIM12,
    # flake8-simplify code styles
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -891,7 +891,7 @@ def read_batch_size_from_file(args, filename, model_name):
            if model_name == cur_name:
                batch_size = int(b)
    if batch_size is None:
-        log.warning("Could not find batch size for {}".format(model_name))
+        log.warning("Could not find batch size for %s", model_name)
    elif batch_size == -1:
        raise RuntimeError(
            f"Batch size is unset for {model_name} in {args.batch_size_file}"
--- a/test/simulate_nccl_errors.py
+++ b/test/simulate_nccl_errors.py
@ -31,7 +31,7 @@ if __name__ == "__main__":
        work = process_group.allreduce(torch.rand(10).cuda(rank))
        logging.info('Waiting for allreduce to complete...')
        work.wait()
-        logging.info('Second allreduce successful: {}'.format(work.is_success()))
+        logging.info('Second allreduce successful: %s', work.is_success())
    else:
        logging.info('Aborting all other ranks.')
        os.abort()
--- a/torch/_dynamo/backends/distributed.py
+++ b/torch/_dynamo/backends/distributed.py
@ -45,9 +45,11 @@ def pretty_print_buckets(buckets: List[Bucket]):
    try:
        from tabulate import tabulate
        # TODO: Do you really want to log.info this?  It would get
        # suppressed if log level is too low
        log.info(
-            "\nDDPOptimizer bucket assignments\n"
+            "\nDDPOptimizer bucket assignments\n%s",
-            + tabulate(rows, headers=headers, tablefmt="simple_grid")
+            tabulate(rows, headers=headers, tablefmt="simple_grid"),
        )
    except ImportError:
        log.info(
@ -318,9 +320,7 @@ class DDPOptimizer:
                        else:
                            curr_submod = real_mod
-                        log.debug(
+                        log.debug(f"\n---{n.target} graph---\n{curr_submod.graph}")
                            f"\n---{n.target} graph---\n" + str(curr_submod.graph)
                        )
                        # When calling the compiler on the submod, inputs (new_args) are expected to
                        # be FakeTensors already since Dynamo would have made them FakeTensors in the
@ -348,5 +348,5 @@ class DDPOptimizer:
        submod_compiler.run(*example_inputs)
        split_gm.recompile()
-        log.debug("\n---final graph---\n" + str(split_gm.graph) + "\n---------------\n")
+        log.debug(f"\n---final graph---\n{split_gm.graph}\n---------------\n")
        return split_gm
--- a/torch/_dynamo/convert_frame.py
+++ b/torch/_dynamo/convert_frame.py
@ -262,9 +262,9 @@ def convert_frame_assert(
            assert code in guard_failures, "TODO(whc) any other recompile reasons?"
            log.warning(
                f"torch._dynamo hit config.cache_size_limit ({config.cache_size_limit})\n"
-                + f"   function: {format_func_info(code)}\n"
+                f"   function: {format_func_info(code)}\n"
-                + f"   reasons:  {format_guard_failures(code)}\n"
+                f"   reasons:  {format_guard_failures(code)}\n"
-                + f"to diagnose recompilation issues, see {troubleshooting_url}."
+                f"to diagnose recompilation issues, see {troubleshooting_url}."
            )
            unimplemented("cache_size_limit reached")
--- a/torch/backends/xeon/run_cpu.py
+++ b/torch/backends/xeon/run_cpu.py
@ -307,7 +307,7 @@ or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib6
            find_tc = self.add_lib_preload(lib_type="tcmalloc")
            if not find_tc:
                msg = f"{self.msg_lib_notfound} you can use \"conda install -c conda-forge gperftools\" to install {{0}}"
-                logger.warning(msg.format("TCmalloc", "tcmalloc"))
+                logger.warning(msg.format("TCmalloc", "tcmalloc"))  # noqa: G001
            else:
                logger.info("Use TCMalloc memory allocator")
@ -315,7 +315,7 @@ or /.local/lib/ or /usr/local/lib/ or /usr/local/lib64/ or /usr/lib or /usr/lib6
            find_je = self.add_lib_preload(lib_type="jemalloc")
            if not find_je:
                msg = f"{self.msg_lib_notfound} you can use \"conda install -c conda-forge jemalloc\" to install {{0}}"
-                logger.warning(msg.format("Jemalloc", "jemalloc"))
+                logger.warning(msg.format("Jemalloc", "jemalloc"))  # noqa: G001
            else:
                logger.info("Use JeMalloc memory allocator")
                self.set_env("MALLOC_CONF", "oversize_threshold:1,background_thread:true,metadata_thp:auto")
@ -371,7 +371,7 @@ Value applied: {os.environ[env_name]}. Value ignored: {env_value}")
            find_iomp = self.add_lib_preload(lib_type="iomp5")
            if not find_iomp:
                msg = f"{self.msg_lib_notfound} you can use \"conda install mkl\" to install {{0}}"
-                logger.warning(msg.format("iomp", "iomp5"))
+                logger.warning(msg.format("iomp", "iomp5"))  # noqa: G001
            else:
                logger.info("Using Intel OpenMP")
                if set_kmp_affinity:
@ -429,9 +429,9 @@ please make sure ninstances <= total_cores)")
                    num_leftover_cores = ncore_per_node % args.ncores_per_instance
                    if args.ncores_per_instance > ncore_per_node:
                        # too many ncores_per_instance to skip cross-node cores
-                        logger.warning("there are {} core(s) per socket, but you specify {} ncores_per_instance and \
+                        logger.warning("there are %s core(s) per socket, but you specify %s ncores_per_instance and \
 skip_cross_node_cores. Please make sure --ncores-per-instance < core(s) per \
-socket".format(ncore_per_node, args.ncores_per_instance))
+socket", ncore_per_node, args.ncores_per_instance)
                        exit(-1)
                    elif num_leftover_cores == 0:
                        # aren't any cross-node cores
--- a/torch/distributed/algorithms/ddp_comm_hooks/post_localSGD_hook.py
+++ b/torch/distributed/algorithms/ddp_comm_hooks/post_localSGD_hook.py
@ -36,7 +36,7 @@ class PostLocalSGDState:
        post_local_gradient_allreduce=True,
    ):
        logger.info(
-            "Local SGD will be started after {} iterations".format(start_localSGD_iter)
+            "Local SGD will be started after %s iterations", start_localSGD_iter
        )
        # The group used for all-reducing gradients globally.
@ -58,7 +58,7 @@ class PostLocalSGDState:
        if self.iter == self.start_localSGD_iter:
            logger.info(
-                "Start to apply local SGD after {} iterations.".format(self.iter)
+                "Start to apply local SGD after %s iterations.", self.iter
            )
--- a/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
+++ b/torch/distributed/algorithms/ddp_comm_hooks/powerSGD_hook.py
@ -106,8 +106,8 @@ def _report_compression_stats(bucket, state):
    ):
        stats = state.compression_stats()
        logger.info(
-            "Compression stats: iter {}, total before compression {}, total after compression {}, "
+            "Compression stats: iter %s, total before compression %s, total after compression %s, "
-            "rate {}".format(state.iter, stats[1], stats[2], stats[0])
+            "rate %s", state.iter, stats[1], stats[2], stats[0]
        )
        state.next_stats_report = state.iter + state.compression_stats_logging_frequency
@ -183,19 +183,18 @@ class PowerSGDState:
        batch_tensors_with_same_shape: bool = False,
    ):
        logger.info(
-            "PowerSGD config: matrix_approximation_rank = {}; start_powerSGD_iter = {}; "
+            "PowerSGD config: matrix_approximation_rank = %s; start_powerSGD_iter = %s; "
-            "min_compression_rate = {}; orthogonalization_epsilon = {}; use_error_feedback = {}; warm_start = {}; "
+            "min_compression_rate = %s; orthogonalization_epsilon = %s; use_error_feedback = %s; warm_start = %s; "
-            "random_seed = {}; compression_stats_logging_frequency = {}; batch_tensors_with_same_shape = {}".format(
+            "random_seed = %s; compression_stats_logging_frequency = %s; batch_tensors_with_same_shape = %s",
-                matrix_approximation_rank,
+            matrix_approximation_rank,
-                start_powerSGD_iter,
+            start_powerSGD_iter,
-                min_compression_rate,
+            min_compression_rate,
-                orthogonalization_epsilon,
+            orthogonalization_epsilon,
-                use_error_feedback,
+            use_error_feedback,
-                warm_start,
+            warm_start,
-                random_seed,
+            random_seed,
-                compression_stats_logging_frequency,
+            compression_stats_logging_frequency,
-                batch_tensors_with_same_shape,
+            batch_tensors_with_same_shape,
            )
        )
        self.process_group = process_group
@ -300,7 +299,7 @@ class PowerSGDState:
        if self.iter == self.start_powerSGD_iter:
            logger.info(
-                "Start to apply PowerSGD after {} iterations.".format(self.iter)
+                "Start to apply PowerSGD after %s iterations.", self.iter
            )
    def compression_stats(self):
@ -409,9 +408,8 @@ def powerSGD_hook(
            input_tensor.add_(state.error_dict[bucket_index])
        else:
            logger.info(
-                "A zero tensor of length {} that represents local error is created.".format(
+                "A zero tensor of length %s that represents local error is created.",
-                    total_length
+                total_length
                )
            )
            state.error_dict[bucket_index] = torch.zeros(
                total_length, device=device, dtype=dtype
@ -468,9 +466,8 @@ def powerSGD_hook(
        # Only log this if warm-start to avoid spamming.
        if state.warm_start:
            logger.info(
-                "Allocating contiguous memory of length {} for Ps, and of length {} for Qs, respectively.".format(
+                "Allocating contiguous memory of length %s for Ps, and of length %s for Qs, respectively.",
-                    total_Ps_size, total_Qs_size
+                total_Ps_size, total_Qs_size
                )
            )
        state.p_memory_dict[bucket_index] = torch.empty(
            total_Ps_size, device=device, dtype=dtype
@ -728,9 +725,8 @@ def batched_powerSGD_hook(
            input_tensor.add_(state.error_dict[bucket_index])
        else:
            logger.info(
-                "A zero tensor of length {} that represents local error is created.".format(
+                "A zero tensor of length %s that represents local error is created.",
-                    padded_total_length
+                padded_total_length
                )
            )
            state.error_dict[bucket_index] = torch.zeros(
                padded_total_length, device=device, dtype=input_tensor.dtype
@ -749,9 +745,8 @@ def batched_powerSGD_hook(
        # Only log this if warm-start to avoid spamming.
        if state.warm_start:
            logger.info(
-                "Initializing low-rank tensors P and Q, each of which has a shape of {} x {}.".format(
+                "Initializing low-rank tensors P and Q, each of which has a shape of %s x %s.",
-                    square_side_length, state.matrix_approximation_rank
+                square_side_length, state.matrix_approximation_rank
                )
            )
        def create_low_rank_tensor(fill_random_values, rng):
--- a/torch/distributed/distributed_c10d.py
+++ b/torch/distributed/distributed_c10d.py
@ -476,7 +476,7 @@ def _store_based_barrier(rank, store, timeout):
    """
    store_key = "{}:{}".format(STORE_BASED_BARRIER_PREFIX, _world.group_count)
    store.add(store_key, 1)
-    logger.info("Added key: {} to store for rank: {}".format(store_key, rank))
+    logger.info("Added key: %s to store for rank: %s", store_key, rank)
    # Now wait for all workers to check in with the store.
    world_size = get_world_size()
@ -496,9 +496,8 @@ def _store_based_barrier(rank, store, timeout):
        if timedelta(seconds=(time.time() - log_time)) > timedelta(seconds=10):
            logger.info(
                "Waiting in store based barrier to initialize process group for "
-                "rank: {}, key: {} (world_size={}, worker_count={}, timeout={})".format(
+                "rank: %s, key: %s (world_size=%s, worker_count=%s, timeout=%s)",
-                    rank, store_key, world_size, worker_count, timeout
+                rank, store_key, world_size, worker_count, timeout
                )
            )
            log_time = time.time()
@ -3716,7 +3715,8 @@ def new_subgroups(
        if rank in ranks_in_subgroup:
            cur_subgroup = subgroup
            logger.info(
-                "Rank {} is assigned to subgroup {}".format(rank, ranks_in_subgroup)
+                "Rank %s is assigned to subgroup %s",
                rank, ranks_in_subgroup
            )
    return cur_subgroup, subgroups
@ -3828,7 +3828,7 @@ def new_subgroups_by_enumeration(
            rank_to_ranks_dict[rank] = ranks
            if my_rank == rank:
                cur_subgroup = subgroup
-                logger.info("Rank {} is assigned to subgroup {}".format(rank, ranks))
+                logger.info("Rank %s is assigned to subgroup %s", rank, ranks)
    return cur_subgroup, subgroups
--- a/torch/distributed/elastic/rendezvous/etcd_rendezvous.py
+++ b/torch/distributed/elastic/rendezvous/etcd_rendezvous.py
@ -211,7 +211,7 @@ class EtcdRendezvous:
        last_call_timeout,
    ):
        self.client = client
-        log.info("Etcd machines: " + str(self.client.machines))
+        log.info("Etcd machines: %s", self.client.machines)
        self._prefix = prefix
        self._run_id = run_id
@ -310,7 +310,7 @@ class EtcdRendezvous:
                # to avoid spamming etcd
                # FIXME: there are a few things that fall under this like
                # etcd.EtcdKeyNotFound, etc, which could be handled more explicitly.
-                log.info("Rendezvous attempt failed, will retry. Reason: " + str(e))
+                log.info("Rendezvous attempt failed, will retry. Reason: %s", e)
                time.sleep(1)
    def init_phase(self):
@ -335,12 +335,12 @@ class EtcdRendezvous:
        try:
            active_version = self.try_create_rendezvous()
            state = json.loads(active_version.value)
-            log.info("New rendezvous state created: " + str(state))
+            log.info("New rendezvous state created: %s", state)
        except etcd.EtcdAlreadyExist:
            active_version, state = self.get_rdzv_state()
            # Note: it is possible for above query to fail (etcd.EtcdKeyNotFound),
            # but this is ok for us - just means we'll restart from beginning.
-            log.info("Observed existing rendezvous state: " + str(state))
+            log.info("Observed existing rendezvous state: %s", state)
        if state["status"] == "closed":
            raise RendezvousClosedError()
@ -365,9 +365,8 @@ class EtcdRendezvous:
        active_version, this_rank = self.join_rendezvous(expected_version)
        state = json.loads(active_version.value)
        log.info(
-            "Joined rendezvous version {} as rank {}. Full state: {}".format(
+            "Joined rendezvous version %s as rank %s. Full state: %s",
-                state["version"], this_rank, state
+            state["version"], this_rank, state
            )
        )
        # If this worker was first to reach num_min_workers requirement,
@ -380,10 +379,10 @@ class EtcdRendezvous:
        # when min_num_workers is reached.
        if this_rank == self._num_min_workers - 1 and state["status"] == "joinable":
-            log.info("Rank {} is responsible for join last call.".format(this_rank))
+            log.info("Rank %s is responsible for join last call.", this_rank)
            last_call_deadline = time.time() + self._last_call_timeout
            self.handle_join_last_call(expected_version, last_call_deadline)
-            log.info("Rank {} finished join last call.".format(this_rank))
+            log.info("Rank %s finished join last call.", this_rank)
        # Wait for rendezvous state to be frozen, which means a fixed set of peers
        log.info("Waiting for remaining peers.")
@ -412,9 +411,8 @@ class EtcdRendezvous:
        state = json.loads(active_version.value)
        log.info(
-            "Rendezvous version {} is complete. Final state: {}".format(
+            "Rendezvous version %s is complete. Final state: %s",
-                state["version"], state
+            state["version"], state
            )
        )
        # Rendezvous version number; our rank in it; world size
@ -433,9 +431,8 @@ class EtcdRendezvous:
        #   2. if keep alives are missing, destroy it and bail out.
        active_state = self.announce_self_waiting(expected_version)
        log.info(
-            "Added self to waiting list. Rendezvous full state: {}".format(
+            "Added self to waiting list. Rendezvous full state: %s",
-                active_state.value
+            active_state.value
            )
        )
        self.wait_for_rendezvous_to_free(expected_version)
@ -698,9 +695,10 @@ class EtcdRendezvous:
                if key not in keep_alive_keys:
                    # This participant didn't renew their lease. We'll declare this
                    # rendezvous version as dead (but only if it hadn't changed)
-                    log.info("Keep-alive key {} is not renewed.".format(key))
+                    log.info("Keep-alive key %s is not renewed.", key)
                    log.info(
-                        "Rendevous version {} is incomplete. ".format(expected_version)
+                        "Rendevous version %s is incomplete. ",
                        expected_version
                    )
                    log.info("Attempting to destroy it.")
@ -713,9 +711,8 @@ class EtcdRendezvous:
                    )
                    log.info(
-                        "Destroyed rendezvous version {} successfully.".format(
+                        "Destroyed rendezvous version %s successfully.",
-                            expected_version
+                        expected_version
                        )
                    )
                    # We can return (and retry) immediately
--- a/torch/distributed/fsdp/sharded_grad_scaler.py
+++ b/torch/distributed/fsdp/sharded_grad_scaler.py
@ -161,8 +161,9 @@ class ShardedGradScaler(GradScaler):
            for tensor in grad:
                if tensor.device != expected_device:
                    log.error(
-                        "tensor device is %s and expected device is %s"
+                        "tensor device is %s and expected device is %s",
-                        % (tensor.device, expected_device)
+                        tensor.device,
                        expected_device,
                    )
                    raise ValueError("Gradients must be on the same device.")
--- a/torch/distributed/nn/jit/instantiator.py
+++ b/torch/distributed/nn/jit/instantiator.py
@ -73,10 +73,10 @@ def _write(out_path, text):
        old_text = None
    if old_text != text:
        with open(out_path, "w") as f:
-            logger.info("Writing {}".format(out_path))
+            logger.info("Writing %s", out_path)
            f.write(text)
    else:
-        logger.info("Skipped writing {}".format(out_path))
+        logger.info("Skipped writing %s", out_path)
 def _do_instantiate_remote_module_template(