Compare commits

...

7 Commits

Author SHA1 Message Date
23bb7579a7 Fix package name when getting metadata
The package is always called "nvidia-cutlass", only the import name is
either `cutlass` or `cutlass_cppgen`.
2025-11-10 14:29:57 +01:00
466ced1424 Use cutlass as fallback for cutlass_cppgen import
The package is renamed in 4.2.1 but allow for the older package name.
2025-11-10 14:29:57 +01:00
d5b087b64b Return cloned paths from _clone_cutlass_paths
The folders are explicitely copied to the build dir and hence should be used from there
2025-11-10 13:19:54 +01:00
4126e88c2d Use vendored packaging 2025-11-10 13:19:54 +01:00
e61d1a5351 Use importlib.metadata, packaging.Version and log exception
Also fix comment.
2025-11-10 13:19:54 +01:00
ca774114ee Ensure cutlass is initialized when include paths are determined
`try_import_cutlass` might set up a different CUTLASS include path
depending on the Python package installed
As this is used in other places where CUTLASS code is generated the same
must be used for include paths.

Otherwise compilation happening in subprocesses that might not have the
(modified) CUTLASS path set up will fail, e.g. the benchmarking codes
for algorithm selection.
2025-11-10 13:19:54 +01:00
2436a4c7c3 Revert "[cutlass backend] delete pip cutlass path since nvidia stops supporting nvidia-cutlass (#156651)"
This reverts commit f97f03c7efcf2b7a45384b9094eb6be4cb419546.
2025-11-10 13:19:53 +01:00
2 changed files with 50 additions and 8 deletions

View File

@ -34,7 +34,7 @@ from pathlib import Path
from tempfile import _TemporaryFileWrapper
from time import time, time_ns
from types import ModuleType
from typing import Any, cast, Generic, NoReturn, TYPE_CHECKING, TypeVar, Union
from typing import Any, cast, Generic, NoReturn, Optional, TYPE_CHECKING, TypeVar, Union
from typing_extensions import override, Self
import torch
@ -3753,13 +3753,15 @@ def _cuda_compiler() -> str | None:
return "nvcc"
def _cutlass_path() -> str:
def _cutlass_path() -> Optional[str]:
if config.is_fbcode():
from libfb.py import parutil
return parutil.get_dir_path("cutlass-4-headers")
else:
return config.cuda.cutlass_dir
from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass
return config.cuda.cutlass_dir if try_import_cutlass() else None
def _cutlass_paths() -> list[str]:
@ -3772,20 +3774,25 @@ def _cutlass_paths() -> list[str]:
def _clone_cutlass_paths(build_root: str) -> list[str]:
paths = _cutlass_paths()
cutlass_root = _cutlass_path()
if cutlass_root is None:
return []
paths = []
for path in _cutlass_paths():
old_path = os.path.join(cutlass_root, path)
new_path = os.path.join(build_root, path)
shutil.copytree(old_path, new_path, dirs_exist_ok=True)
paths.append(new_path)
return paths
def _cutlass_include_paths() -> list[str]:
cutlass_path = _cutlass_path()
cutlass_root = _cutlass_path()
if cutlass_root is None:
return []
return [
# Use realpath to get canonical absolute paths, in order not to mess up cache keys
os.path.realpath(os.path.join(cutlass_path, path))
os.path.realpath(os.path.join(cutlass_root, path))
for path in _cutlass_paths()
]

View File

@ -1,6 +1,7 @@
# mypy: allow-untyped-defs
import atexit
import functools
import importlib.metadata
import logging
import os
import shutil
@ -16,6 +17,7 @@ import sympy
import torch
from torch._inductor.runtime.runtime_utils import dynamo_timed
from torch._inductor.utils import clear_on_fresh_cache
from torch._vendor.packaging.version import Version
from torch.utils._ordered_set import OrderedSet
from ... import config
@ -41,7 +43,10 @@ def move_cutlass_compiled_cache() -> None:
if not try_import_cutlass.cache_info().currsize > 0:
return
import cutlass_cppgen # type: ignore[import-not-found]
try:
import cutlass_cppgen # type: ignore[import-not-found]
except ImportError:
import cutlass as cutlass_cppgen # type: ignore[import-not-found]
# Check if the CACHE_FILE attribute exists in cutlass_cppgen and if the file exists
if not hasattr(cutlass_cppgen, "CACHE_FILE") or not os.path.exists(
@ -71,7 +76,9 @@ def try_import_cutlass() -> bool:
"""
We want to support three ways of passing in CUTLASS:
1. fbcode, handled by the internal build system.
2. User specifies cutlass_dir. The default is ../third_party/cutlass/,
2. pip install nvidia-cutlass, which provides the cutlass_library package
and the header files in the cutlass_library/source directory.
3. User specifies cutlass_dir. The default is ../third_party/cutlass/,
which is the directory when developers build from source.
"""
if config.is_fbcode():
@ -87,6 +94,34 @@ def try_import_cutlass() -> bool:
return True
try:
cutlass_version = Version(importlib.metadata.version("nvidia-cutlass"))
if cutlass_version < Version("3.7"):
log.warning("CUTLASS version < 3.7 is not recommended.")
import cutlass_library # type: ignore[import-not-found] # noqa: F811
log.debug(
"Found cutlass_library in python search path, overriding config.cuda.cutlass_dir"
)
cutlass_library_dir = os.path.dirname(cutlass_library.__file__)
assert os.path.isdir(cutlass_library_dir), (
f"{cutlass_library_dir} is not a directory"
)
config.cuda.cutlass_dir = os.path.abspath(
os.path.join(
cutlass_library_dir,
"source",
)
)
return True
except (ModuleNotFoundError, importlib.metadata.PackageNotFoundError):
log.debug(
"cutlass_library not found in sys.path, trying to import from config.cuda.cutlass_dir",
exc_info=True,
)
# Copy CUTLASS python scripts to a temp dir and add the temp dir to Python search path.
# This is a temporary hack to avoid CUTLASS module naming conflicts.
# TODO(ipiszy): remove this hack when CUTLASS solves Python scripts packaging structure issues.