[DCP] Returns a copy of sd in copy sd (#123567 )

I found that returning the copy is actually useful in situations where you might do something like: ``` ret = _copy_state_dict(obj, cache) ret.update(some_other_values) ``` and would like `cache` not to change structure from `ret.update(some_other_values)`. Open to some notes here, not returning a copy might force the user to do some additional copies for this case. Pull Request resolved: https://github.com/pytorch/pytorch/pull/123567 Approved by: https://github.com/wz337
[nccl-pg] print broadcast ncclunique id duration (#123963 )
2025-10-26 08:34:52 +08:00 · 2024-04-18 13:20:20 -07:00 · 2024-04-16 17:03:25 -07:00
2 changed files with 23 additions and 4 deletions
--- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp
@ -1976,7 +1976,16 @@ std::vector<std::shared_ptr<NCCLComm>>& ProcessGroupNCCL::getNCCLComm(
  // For point-to-point communication on the same process, don't need broadcast.
  if (!isSendRecvSelf) {
    // Broadcast so that each process can have a unique NCCL ID
+    auto timeStarted = std::chrono::steady_clock::now();
    broadcastUniqueNCCLID(&ncclID, singleP2POp, devicesKey, p2pRank);
+    auto timerDeltaMs =
+        std::chrono::duration_cast<std::chrono::duration<double>>(
+            std::chrono::steady_clock::now() - timeStarted)
+            .count() *
+        1000;
+    LOG(INFO) << logPrefix()
+              << "ProcessGroupNCCL broadcast unique ID through store took "
+              << timerDeltaMs << " ms";
  }

  at::cuda::OptionalCUDAGuard gpuGuard;
--- a/torch/distributed/_state_dict_utils.py
+++ b/torch/distributed/_state_dict_utils.py
@ -117,7 +117,12 @@ def _iterate_state_dict(
            not isinstance(companion_obj, dict)
            or set(companion_obj.keys()) != set(iter_object.keys())
        ):
-            raise CompanionMismatch()
+            msg = (
+                ""
+                if isinstance(companion_obj, dict)
+                else f"{set(companion_obj.keys())=} {set(iter_object.keys())=}"
+            )
+            raise CompanionMismatch(msg)

        ret = {
            key: _iterate_state_dict(
@ -312,10 +317,12 @@ def _copy_state_dict(
    state_dict: Dict[str, Any],
    copy_state_dict: Dict[str, Any],
    non_blocking: bool = False,
-):
+) -> Dict[str, Any]:
    """
    Copies all tensors in a given state dict into a different state_dict with the
-    same structure.
+    same structure. Additionally, a copied state dict with the same value references
+    is returned. Editing the keys on this state dict will not affect the
+    passed in copy_state_dict (but the value references are the same).

    .. warning::
        It is expected by this function that state_dict and copy_state_dict share
@ -331,9 +338,12 @@ def _copy_state_dict(
            The state dict we are copying into. This state_dict must have exactly
             the same structure as the source `state_dict`.
        non_blocking: (bool): Whether copy ops should be performed asynchronously
+
+    Returns:
+        State Dict copy
    """

-    _iterate_state_dict(
+    return _iterate_state_dict(
        state_dict,
        _identity_func,
        _identity_func,