[BE] replace incorrect .. note:: invocations (#142868)

Something I've noticed is that a lot of the distributed sites don't render on our docs at all, but if they ever do, the notes will render properly now 😛 Pull Request resolved: https://github.com/pytorch/pytorch/pull/142868 Approved by: https://github.com/albanD
2025-10-20 21:14:14 +08:00 · 2024-12-11 07:33:50 -08:00
parent 0b96413dbf
commit fd65bd755d
10 changed files with 22 additions and 17 deletions
--- a/torch/backends/mha/init.py
+++ b/torch/backends/mha/init.py
@ -10,7 +10,7 @@ def get_fastpath_enabled() -> bool:
    """Returns whether fast path for TransformerEncoder and MultiHeadAttention
    is enabled, or ``True`` if jit is scripting.

-    ..note:
+    .. note::
        The fastpath might not be run even if ``get_fastpath_enabled`` returns
        ``True`` unless all conditions on inputs are met.
    """
--- a/torch/cuda/init.py
+++ b/torch/cuda/init.py
@ -601,8 +601,9 @@ def stream(stream: Optional["torch.cuda.Stream"]) -> StreamContext:
    Arguments:
        stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
-    ..Note:: In eager mode stream is of type Stream class while in JIT it is
-    an object of the custom class ``torch.classes.cuda.Stream``.
+    .. note::
+        In eager mode stream is of type Stream class while in JIT it is
+        an object of the custom class ``torch.classes.cuda.Stream``.
    """
    return StreamContext(stream)

--- a/torch/distributed/elastic/multiprocessing/api.py
+++ b/torch/distributed/elastic/multiprocessing/api.py
@ -510,10 +510,11 @@ class PContext(abc.ABC):
        A timeout value of zero simply queries the status of the processes (e.g. equivalent
        to a poll).

-        ..note: Multiprocessing library registers SIGTERM and SIGINT signal handlers that raise
-                ``SignalException`` when the signals received. It is up to the consumer of the code
-                to properly handle the exception. It is important not to swallow the exception otherwise
-                the process would not terminate. Example of the typical workflow can be:
+        .. note::
+            Multiprocessing library registers SIGTERM and SIGINT signal handlers that raise
+            ``SignalException`` when the signals received. It is up to the consumer of the code
+            to properly handle the exception. It is important not to swallow the exception otherwise
+            the process would not terminate. Example of the typical workflow can be:

        .. code-block:: python
            pc = start_processes(...)
--- a/torch/distributed/elastic/utils/distributed.py
+++ b/torch/distributed/elastic/utils/distributed.py
@ -142,7 +142,7 @@ def get_free_port():
        >>> get_free_port()
        63976

-    ..note:
+    .. note::
        The port returned by :func:`get_free_port` is not reserved and may be
        taken by another process after this function returns.
    """
--- a/torch/distributed/fsdp/api.py
+++ b/torch/distributed/fsdp/api.py
@ -246,7 +246,8 @@ class StateDictType(Enum):
    This enum indicates that which type of ``state_dict`` the FSDP module is
    currently processing (returning or loading).
    The default value is FULL_STATE_DICT to comply the PyTorch convention.
-    ..note::
+
+    .. note::
        FSDP currently supports three types of ``state_dict``:
            1. ``state_dict/load_state_dict`: this pair of APIs return and load
               the non-sharded, unflattened parameters. The semantics is the
--- a/torch/distributed/launcher/api.py
+++ b/torch/distributed/launcher/api.py
@ -64,7 +64,8 @@ class LaunchConfig:
        local_addr: address of the local node if any. If not set, a lookup on the local
                machine's FQDN will be performed.
        local_ranks_filter: ranks for which to show logs in console. If not set, show from all.
-    ..note:
+
+    .. note::
        `rdzv_timeout` is a legacy argument that will be removed in future.
        Set the timeout via `rdzv_configs['timeout']`

--- a/torch/distributed/optim/zero_redundancy_optimizer.py
+++ b/torch/distributed/optim/zero_redundancy_optimizer.py
@ -37,9 +37,10 @@ def _recursive_copy_to_device(

    Non-tensor values are passed as-is in the result.

-    .. note:  These are all copies, so if there are two objects that reference
-    the same object, then after this call, there will be two different objects
-    referenced on the device.
+    .. note::
+        These are all copies, so if there are two objects that reference
+        the same object, then after this call, there will be two different objects
+        referenced on the device.
    """
    if isinstance(value, torch.Tensor):
        return value.to(device, non_blocking=non_blocking)
@ -1100,7 +1101,7 @@ class ZeroRedundancyOptimizer(Optimizer, Joinable):
        Returns:
            Optional loss depending on the underlying local optimizer.

-        .. note: Any extra parameters are passed to the base optimizer as-is.
+        .. note:: Any extra parameters are passed to the base optimizer as-is.
        """
        if self._overlap_with_ddp:
            logger.warning(
--- a/torch/distributed/tensor/debug/_comm_mode.py
+++ b/torch/distributed/tensor/debug/_comm_mode.py
@ -226,7 +226,7 @@ class CommDebugMode(TorchDispatchMode):
    functional collectives within its context. It does this using a
    ``TorchDispatchMode``.

-    .. note: Not all collectives are supported yet.
+    .. note:: Not all collectives are supported yet.

    Example usage

--- a/torch/mtia/init.py
+++ b/torch/mtia/init.py
@ -277,7 +277,7 @@ def stream(stream: Optional["torch.mtia.Stream"]) -> StreamContext:
    Arguments:
        stream (Stream): selected stream. This manager is a no-op if it's
            ``None``.
-    ..Note:: In eager mode stream is of type Stream class while in JIT it doesn't support torch.mtia.stream
+    .. note:: In eager mode stream is of type Stream class while in JIT it doesn't support torch.mtia.stream
    """
    return StreamContext(stream)

--- a/torch/nn/modules/utils.py
+++ b/torch/nn/modules/utils.py
@ -50,7 +50,7 @@ def consume_prefix_in_state_dict_if_present(
 ) -> None:
    r"""Strip the prefix in state_dict in place, if any.

-    ..note::
+    .. note::
        Given a `state_dict` from a DP/DDP model, a local model can load it by applying
        `consume_prefix_in_state_dict_if_present(state_dict, "module.")` before calling
        :meth:`torch.nn.Module.load_state_dict`.