diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index c90dba2220c5..9fef00f5a809 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -239,7 +239,9 @@ else: ) return not_none(device_mesh.mesh_dim_names.index(mesh_dim_name)) - def _get_slice_mesh_layout(self, device_mesh, mesh_dim_names) -> _MeshLayout: + def _get_slice_mesh_layout( + self, device_mesh: "DeviceMesh", mesh_dim_names: tuple[str, ...] + ) -> _MeshLayout: """ Validate whether the mesh_dim_names is valid for slicing the given device_mesh. If valid, return dim indexes of the slice mesh in the device mesh. @@ -266,7 +268,7 @@ else: else {} ) valid_mesh_dim_names = [ - *device_mesh.mesh_dim_names, + *not_none(device_mesh.mesh_dim_names), *flatten_name_to_root_layout, ] @@ -281,11 +283,17 @@ else: layout_sliced = [] for name in mesh_dim_names: - if name in device_mesh.mesh_dim_names: + if name in not_none(device_mesh.mesh_dim_names): layout_sliced.append( - device_mesh._layout[device_mesh.mesh_dim_names.index(name)] + device_mesh._layout[ + not_none(device_mesh.mesh_dim_names).index(name) + ] ) elif name in flatten_name_to_root_layout: + warnings.warn( + "Slicing a flattened dim from root mesh will be deprecated in PT 2.11. " + "Users need to bookkeep the flattened mesh directly. " + ) layout_sliced.append(flatten_name_to_root_layout[name]) sliced_sizes = tuple(l.sizes for l in layout_sliced)