PyProcessGroup: support rank, world size, group name/desc overrides (#141529)

This improves `PyProcessGroup` so you can override rank, world size and group name/desc methods from Python. These will be needed to support resizable process groups in torchft.

This also has some small fixes in test_c10d_pypg.py to use threads instead of processes which speeds up the test execution by ~10x.

Test plan:

```
pytest test/distributed/test_c10d_pypg.py
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/141529
Approved by: https://github.com/fegin
This commit is contained in:
Tristan Rice
2024-11-26 20:56:54 +00:00
committed by PyTorch MergeBot
parent 5696df439b
commit 9f4f061f89
4 changed files with 99 additions and 22 deletions

View File

@ -125,11 +125,11 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
int size);
~ProcessGroup() override;
int getRank() const {
virtual int getRank() const {
return rank_;
}
int getSize() const {
virtual int getSize() const {
return size_;
}
@ -863,10 +863,10 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder {
return getDefaultBackend()->hasHooks();
}
const std::string& getGroupName() const;
void setGroupName(const std::string& name);
const std::string& getGroupDesc() const;
void setGroupDesc(const std::string& name);
virtual const std::string& getGroupName() const;
virtual void setGroupName(const std::string& name);
virtual const std::string& getGroupDesc() const;
virtual void setGroupDesc(const std::string& name);
void enableCollectivesTiming();
void release_resources() override;