From 8817e5ac80ba37edadaadd96eafc2832a03b48e6 Mon Sep 17 00:00:00 2001
From: Jane Xu <janeyx@meta.com>
Date: Tue, 20 May 2025 13:27:28 -0700
Subject: [PATCH] Render Example: and not Example:: in docs (#153978)

Everything here is a grep except the changes in tools/autograd/load_derivatives.py which I manually corrected.

The correct notation is:
```
Example::

    >>> ...
```

It is common and wrong to have:
```
Example::
    >>> ...
```

In the wrong example, we get these pesky double colons:
![image](https://github.com/user-attachments/assets/20ffd349-68bb-4552-966c-e23923350476)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/153978
Approved by: https://github.com/soulitzer, https://github.com/malfet
---
 tools/autograd/load_derivatives.py   |  7 ++++---
 torch/_custom_ops.py                 |  2 ++
 torch/_functorch/eager_transforms.py |  1 +
 torch/_size_docs.py                  |  1 +
 torch/_tensor_docs.py                |  5 +++++
 torch/_torch_docs.py                 | 12 ++++++++++++
 torch/autograd/function.py           |  3 +++
 torch/distributed/device_mesh.py     |  4 ++++
 torch/futures/__init__.py            |  4 ++++
 torch/library.py                     |  4 ++++
 torch/special/__init__.py            | 11 +++++++++++
 11 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/tools/autograd/load_derivatives.py b/tools/autograd/load_derivatives.py
index 9d600a815758..6df4d389fa55 100644
--- a/tools/autograd/load_derivatives.py
+++ b/tools/autograd/load_derivatives.py
@@ -986,7 +986,7 @@ def saved_variables(
 
 
 def _create_op_prefix(name: str) -> str:
-    """Takes a native function name converts to a op prefix name.
+    r"""Takes a native function name converts to an op prefix name.
 
     Note that the "name" parameter must be the native function name
     without the optional variant suffix, so "add" instead of
@@ -995,8 +995,9 @@ def _create_op_prefix(name: str) -> str:
     OP names correspond to classes, hence the change to title case.
 
     Example::
-    >>> _create_op_prefix("add")
-    'AddBackward'
+
+        >>> _create_op_prefix("add")
+        'AddBackward'
     """
     camel_case = "".join([p.title() for p in name.split("_")])
     return (camel_case + "Backward").replace("ForwardBackward", "Backward")
diff --git a/torch/_custom_ops.py b/torch/_custom_ops.py
index 25140653b996..5203da640fa5 100644
--- a/torch/_custom_ops.py
+++ b/torch/_custom_ops.py
@@ -52,6 +52,7 @@ def custom_op(qualname, func_or_schema=None):
             schema string.
 
     Example::
+
         >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
         >>> import torch
         >>> import numpy as np
@@ -134,6 +135,7 @@ def impl(qualname, *, device_types=("cpu", "cuda"), func=None):
         device_types (str or Iterable[str]): the device type(s) to register the function for.
 
     Example::
+
         >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
         >>> import torch
         >>> import numpy as np
diff --git a/torch/_functorch/eager_transforms.py b/torch/_functorch/eager_transforms.py
index f058c215c39e..99675183a45f 100644
--- a/torch/_functorch/eager_transforms.py
+++ b/torch/_functorch/eager_transforms.py
@@ -1701,6 +1701,7 @@ def linearize(func: Callable, *primals) -> tuple[Any, Callable]:
         with a single evaluation.
 
     Example::
+
         >>> import torch
         >>> from torch.func import linearize
         >>> def fn(x):
diff --git a/torch/_size_docs.py b/torch/_size_docs.py
index 4e79e8023f5b..e30240a1e6f6 100644
--- a/torch/_size_docs.py
+++ b/torch/_size_docs.py
@@ -19,6 +19,7 @@ More formally, for a tensor ``x = tensor.ones(10, 10)`` with size ``s = torch.Si
 ``x.numel() == x.size().numel() == s.numel() == 100`` holds true.
 
 Example::
+
     >>> x=torch.ones(10, 10)
     >>> s=x.size()
     >>> s
diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
index bee7f7385fb0..4b31a9de93b7 100644
--- a/torch/_tensor_docs.py
+++ b/torch/_tensor_docs.py
@@ -2474,6 +2474,7 @@ Args:
     value (float): the value to fill with
 
 Example::
+
     >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float)
     >>> index = torch.tensor([0, 2])
     >>> x.index_fill_(1, index, -1)
@@ -6871,6 +6872,7 @@ The returned tensor and :attr:`self` share the same underlying storage.
 Returns :attr:`self` if :attr:`self` is a real-valued tensor tensor.
 
 Example::
+
     >>> x=torch.randn(4, dtype=torch.cfloat)
     >>> x
     tensor([(0.3100+0.3553j), (-0.5445-0.7896j), (-1.6492-0.0633j), (-0.0638-0.8119j)])
@@ -6890,6 +6892,7 @@ The returned tensor and :attr:`self` share the same underlying storage.
     :func:`imag` is only supported for tensors with complex dtypes.
 
 Example::
+
     >>> x=torch.randn(4, dtype=torch.cfloat)
     >>> x
     tensor([(0.3100+0.3553j), (-0.5445-0.7896j), (-1.6492-0.0633j), (-0.0638-0.8119j)])
@@ -6923,6 +6926,7 @@ matrix multiplication, it is necessary to use ``int32`` indexing in order
 to avoid downcasting and potentially losing information.
 
 Example::
+
     >>> csr = torch.eye(5,5).to_sparse_csr()
     >>> csr.crow_indices()
     tensor([0, 1, 2, 3, 4, 5], dtype=torch.int32)
@@ -6943,6 +6947,7 @@ matrix multiplication, it is necessary to use ``int32`` indexing in order
 to avoid downcasting and potentially losing information.
 
 Example::
+
     >>> csr = torch.eye(5,5).to_sparse_csr()
     >>> csr.col_indices()
     tensor([0, 1, 2, 3, 4], dtype=torch.int32)
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
index 417f2ea0b16e..34ffdb313c4a 100644
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@@ -611,6 +611,7 @@ Args:
     {input}
 
 Example::
+
     >>> x = torch.arange(4, dtype=torch.float)
     >>> A = torch.complex(x, x).reshape(2, 2)
     >>> A
@@ -2050,6 +2051,7 @@ Args:
     indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`.
 
 Example::
+
     >>> t = torch.arange(16.0).reshape(4,4)
     >>> t
     tensor([[ 0.,  1.,  2.,  3.],
@@ -2099,6 +2101,7 @@ Args:
     indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`.
 
 Example::
+
     >>> t = torch.arange(16.0).reshape(4,4)
     >>> t
     tensor([[ 0.,  1.,  2.,  3.],
@@ -2140,6 +2143,7 @@ Args:
     indices_or_sections (int or list or tuple of ints): See argument in :func:`torch.tensor_split`.
 
 Example::
+
     >>> t = torch.arange(16.0).reshape(2, 2, 4)
     >>> t
     tensor([[[ 0.,  1.,  2.,  3.],
@@ -2295,6 +2299,7 @@ Returns:
         :func:`torch.corrcoef` normalized covariance matrix.
 
 Example::
+
     >>> x = torch.tensor([[0, 2], [1, 1], [2, 0]]).T
     >>> x
     tensor([[0, 1, 2],
@@ -4477,6 +4482,7 @@ Keyword args:
     {pin_memory}
 
 Example::
+
     >>> t = torch.randn(2, 5, dtype=torch.float64)
     >>> t.numpy().tofile('storage.pt')
     >>> t_mapped = torch.from_file('storage.pt', shared=False, size=10, dtype=torch.float64)
@@ -5148,6 +5154,7 @@ Returns:
     bin_edges(Tensor[]): sequence of N 1D Tensors containing the bin edges.
 
 Example::
+
     >>> torch.histogramdd(torch.tensor([[0., 1.], [1., 0.], [2., 0.], [2., 2.]]), bins=[3, 3],
     ...                   weight=torch.tensor([1., 2., 4., 8.]))
         torch.return_types.histogramdd(
@@ -10042,6 +10049,7 @@ Keyword args:
     {check_invariants}
 
 Example::
+
     >>> compressed_indices = [0, 2, 4]
     >>> plain_indices = [0, 1, 0, 1]
     >>> values = [1, 2, 3, 4]
@@ -10102,6 +10110,7 @@ Keyword args:
     {check_invariants}
 
 Example::
+
     >>> crow_indices = [0, 2, 4]
     >>> col_indices = [0, 1, 0, 1]
     >>> values = [1, 2, 3, 4]
@@ -10164,6 +10173,7 @@ Keyword args:
     {check_invariants}
 
 Example::
+
     >>> ccol_indices = [0, 2, 4]
     >>> row_indices = [0, 1, 0, 1]
     >>> values = [1, 2, 3, 4]
@@ -10228,6 +10238,7 @@ Keyword args:
     {check_invariants}
 
 Example::
+
     >>> crow_indices = [0, 1, 2]
     >>> col_indices = [0, 1]
     >>> values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
@@ -10294,6 +10305,7 @@ Keyword args:
     {check_invariants}
 
 Example::
+
     >>> ccol_indices = [0, 1, 2]
     >>> row_indices = [0, 1]
     >>> values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
diff --git a/torch/autograd/function.py b/torch/autograd/function.py
index 2086d8b75a56..284068989261 100644
--- a/torch/autograd/function.py
+++ b/torch/autograd/function.py
@@ -64,6 +64,7 @@ class FunctionCtx:
         See :ref:`extending-autograd` for more details on how to use this method.
 
         Example::
+
             >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
             >>> class Func(Function):
             >>>     @staticmethod
@@ -107,6 +108,7 @@ class FunctionCtx:
         See :ref:`extending-autograd` for more details on how to use this method.
 
         Example::
+
             >>> # xdoctest: +SKIP
             >>> class Func(torch.autograd.Function):
             >>>     @staticmethod
@@ -234,6 +236,7 @@ class FunctionCtx:
         prior to calling the :func:`backward` and :func:`jvp` methods.
 
         Example::
+
             >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_AUTOGRAD)
             >>> class SimpleFunc(Function):
             >>>     @staticmethod
diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py
index 224323ced2fb..55c6ff831464 100644
--- a/torch/distributed/device_mesh.py
+++ b/torch/distributed/device_mesh.py
@@ -417,6 +417,7 @@ else:
         of mesh reduces across rows (0, 1, 2, 3) and (4, 5, 6, 7).
 
         Example::
+
             >>> # xdoctest: +SKIP("no rank")
             >>> from torch.distributed.device_mesh import DeviceMesh
             >>>
@@ -700,6 +701,7 @@ else:
                 Calling mesh_3d["cp", "dp"] on rank 2, 3, 6, 7 returns a 2D submesh of DeviceMesh:([[2, 6], [3, 7]]).
 
             Example::
+
                 >>> # xdoctest: +SKIP("no rank")
                 >>> from torch.distributed.device_mesh import DeviceMesh
                 >>>
@@ -926,6 +928,7 @@ else:
             Calling mesh_2d.get_local_rank(mesh_dim=1) on rank 3, 7 would return 3.
 
             Example::
+
                 >>> # xdoctest: +SKIP("no rank")
                 >>> from torch.distributed.device_mesh import DeviceMesh
                 >>>
@@ -1008,6 +1011,7 @@ else:
             DeviceMesh: A :class:`DeviceMesh` object representing the device layout.
 
         Example::
+
             >>> # xdoctest: +SKIP("no rank")
             >>> from torch.distributed.device_mesh import init_device_mesh
             >>>
diff --git a/torch/futures/__init__.py b/torch/futures/__init__.py
index 236165f61efa..dcca39d06a4e 100644
--- a/torch/futures/__init__.py
+++ b/torch/futures/__init__.py
@@ -149,6 +149,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
             on those futures independently.
 
         Example::
+
             >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
             >>> def callback(fut):
             ...     print(f"RPC return value is {fut.wait()}.")
@@ -197,6 +198,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
             for handling completion/waiting on those futures independently.
 
         Example::
+
             >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
             >>> def callback(fut):
             ...     print("This will run after the future has finished.")
@@ -230,6 +232,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
             result (object): the result object of this ``Future``.
 
         Example::
+
             >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
             >>> import threading
             >>> import time
@@ -259,6 +262,7 @@ class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta):
             result (BaseException): the exception for this ``Future``.
 
         Example::
+
             >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_FUTURES)
             >>> fut = torch.futures.Future()
             >>> fut.set_exception(ValueError("foo"))
diff --git a/torch/library.py b/torch/library.py
index 6c2f625a710c..b0934a251116 100644
--- a/torch/library.py
+++ b/torch/library.py
@@ -152,6 +152,7 @@ class Library:
             name of the operator as inferred from the schema.
 
         Example::
+
             >>> my_lib = Library("mylib", "DEF")
             >>> my_lib.define("sum(Tensor self) -> Tensor")
         """
@@ -254,6 +255,7 @@ class Library:
                           the dispatch key that the library was created with.
 
         Example::
+
             >>> my_lib = Library("aten", "IMPL")
             >>> my_lib._impl_with_aoti_compile("div.Tensor", "CPU")
         """
@@ -316,6 +318,7 @@ class Library:
                          registered.
 
         Example::
+
             >>> my_lib = Library("aten", "IMPL")
             >>> def div_cpu(self, other):
             >>>     return self * (1 / other)
@@ -399,6 +402,7 @@ class Library:
                          to :attr:`fn` when calling. This should be used to create the appropriate keyset for redispatch calls.
 
         Example::
+
             >>> my_lib = Library("_", "IMPL")
             >>> def fallback_kernel(op, *args, **kwargs):
             >>>     # Handle all autocast ops generically
diff --git a/torch/special/__init__.py b/torch/special/__init__.py
index 9f872c93a4f3..9eb3fefefdea 100644
--- a/torch/special/__init__.py
+++ b/torch/special/__init__.py
@@ -88,6 +88,7 @@ Keyword args:
     out (Tensor, optional): the output tensor.
 
 Example::
+
     >>> a = torch.arange(-0.5, 1, 0.5)
     >>> a
     tensor([-0.5000,  0.0000,  0.5000])
@@ -189,6 +190,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> a = torch.tensor([1, 0.5])
     >>> torch.special.polygamma(1, a)
     tensor([1.64493, 4.9348])
@@ -592,6 +594,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> torch.special.i0e(torch.arange(5, dtype=torch.float32))
     tensor([1.0000, 0.4658, 0.3085, 0.2430, 0.2070])
 """.format(
@@ -618,6 +621,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> torch.special.i1(torch.arange(5, dtype=torch.float32))
     tensor([0.0000, 0.5652, 1.5906, 3.9534, 9.7595])
 """.format(
@@ -645,6 +649,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> torch.special.i1e(torch.arange(5, dtype=torch.float32))
     tensor([0.0000, 0.2079, 0.2153, 0.1968, 0.1788])
 """.format(
@@ -671,6 +676,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> torch.special.ndtr(torch.tensor([-3., -2, -1, 0, 1, 2, 3]))
     tensor([0.0013, 0.0228, 0.1587, 0.5000, 0.8413, 0.9772, 0.9987])
 """.format(
@@ -700,6 +706,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> torch.special.ndtri(torch.tensor([0, 0.25, 0.5, 0.75, 1]))
     tensor([   -inf, -0.6745,  0.0000,  0.6745,     inf])
 """.format(
@@ -726,6 +733,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> torch.special.log_ndtr(torch.tensor([-3., -2, -1, 0, 1, 2, 3]))
     tensor([-6.6077 -3.7832 -1.841  -0.6931 -0.1728 -0.023  -0.0014])
 """.format(
@@ -765,6 +773,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> t = torch.randn(4)
     >>> t
     tensor([ 0.2252, -0.2948,  1.0267, -1.1566])
@@ -838,6 +847,7 @@ Args:
         is performed. This is useful for preventing data type overflows. Default: None.
 
 Example::
+
     >>> t = torch.ones(2, 2)
     >>> torch.special.log_softmax(t, 0)
     tensor([[-0.6931, -0.6931],
@@ -868,6 +878,7 @@ Keyword args:
     {out}
 
 Example::
+
     >>> x = torch.tensor([2., 4.])
     >>> torch.special.zeta(x, 1)
     tensor([1.6449, 1.0823])