mirror of
				https://github.com/vllm-project/vllm.git
				synced 2025-10-20 23:03:52 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			766 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			766 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # ruff: noqa
 | |
| # code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py
 | |
| 
 | |
| import datetime
 | |
| import locale
 | |
| import os
 | |
| import re
 | |
| import subprocess
 | |
| import sys
 | |
| # Unlike the rest of the PyTorch this file must be python2 compliant.
 | |
| # This script outputs relevant system environment info
 | |
| # Run it with `python collect_env.py` or `python -m torch.utils.collect_env`
 | |
| from collections import namedtuple
 | |
| 
 | |
| from vllm.envs import environment_variables
 | |
| 
 | |
| try:
 | |
|     import torch
 | |
|     TORCH_AVAILABLE = True
 | |
| except (ImportError, NameError, AttributeError, OSError):
 | |
|     TORCH_AVAILABLE = False
 | |
| 
 | |
| # System Environment Information
 | |
| SystemEnv = namedtuple(
 | |
|     'SystemEnv',
 | |
|     [
 | |
|         'torch_version',
 | |
|         'is_debug_build',
 | |
|         'cuda_compiled_version',
 | |
|         'gcc_version',
 | |
|         'clang_version',
 | |
|         'cmake_version',
 | |
|         'os',
 | |
|         'libc_version',
 | |
|         'python_version',
 | |
|         'python_platform',
 | |
|         'is_cuda_available',
 | |
|         'cuda_runtime_version',
 | |
|         'cuda_module_loading',
 | |
|         'nvidia_driver_version',
 | |
|         'nvidia_gpu_models',
 | |
|         'cudnn_version',
 | |
|         'pip_version',  # 'pip' or 'pip3'
 | |
|         'pip_packages',
 | |
|         'conda_packages',
 | |
|         'hip_compiled_version',
 | |
|         'hip_runtime_version',
 | |
|         'miopen_runtime_version',
 | |
|         'caching_allocator_config',
 | |
|         'is_xnnpack_available',
 | |
|         'cpu_info',
 | |
|         'rocm_version',  # vllm specific field
 | |
|         'neuron_sdk_version',  # vllm specific field
 | |
|         'vllm_version',  # vllm specific field
 | |
|         'vllm_build_flags',  # vllm specific field
 | |
|         'gpu_topo',  # vllm specific field
 | |
|         'env_vars',
 | |
|     ])
 | |
| 
 | |
| DEFAULT_CONDA_PATTERNS = {
 | |
|     "torch",
 | |
|     "numpy",
 | |
|     "cudatoolkit",
 | |
|     "soumith",
 | |
|     "mkl",
 | |
|     "magma",
 | |
|     "triton",
 | |
|     "optree",
 | |
|     "nccl",
 | |
|     "transformers",
 | |
|     "zmq",
 | |
|     "nvidia",
 | |
|     "pynvml",
 | |
| }
 | |
| 
 | |
| DEFAULT_PIP_PATTERNS = {
 | |
|     "torch",
 | |
|     "numpy",
 | |
|     "mypy",
 | |
|     "flake8",
 | |
|     "triton",
 | |
|     "optree",
 | |
|     "onnx",
 | |
|     "nccl",
 | |
|     "transformers",
 | |
|     "zmq",
 | |
|     "nvidia",
 | |
|     "pynvml",
 | |
| }
 | |
| 
 | |
| 
 | |
| def run(command):
 | |
|     """Return (return-code, stdout, stderr)."""
 | |
|     shell = True if type(command) is str else False
 | |
|     p = subprocess.Popen(command,
 | |
|                          stdout=subprocess.PIPE,
 | |
|                          stderr=subprocess.PIPE,
 | |
|                          shell=shell)
 | |
|     raw_output, raw_err = p.communicate()
 | |
|     rc = p.returncode
 | |
|     if get_platform() == 'win32':
 | |
|         enc = 'oem'
 | |
|     else:
 | |
|         enc = locale.getpreferredencoding()
 | |
|     output = raw_output.decode(enc)
 | |
|     err = raw_err.decode(enc)
 | |
|     return rc, output.strip(), err.strip()
 | |
| 
 | |
| 
 | |
| def run_and_read_all(run_lambda, command):
 | |
|     """Run command using run_lambda; reads and returns entire output if rc is 0."""
 | |
|     rc, out, _ = run_lambda(command)
 | |
|     if rc != 0:
 | |
|         return None
 | |
|     return out
 | |
| 
 | |
| 
 | |
| def run_and_parse_first_match(run_lambda, command, regex):
 | |
|     """Run command using run_lambda, returns the first regex match if it exists."""
 | |
|     rc, out, _ = run_lambda(command)
 | |
|     if rc != 0:
 | |
|         return None
 | |
|     match = re.search(regex, out)
 | |
|     if match is None:
 | |
|         return None
 | |
|     return match.group(1)
 | |
| 
 | |
| 
 | |
| def run_and_return_first_line(run_lambda, command):
 | |
|     """Run command using run_lambda and returns first line if output is not empty."""
 | |
|     rc, out, _ = run_lambda(command)
 | |
|     if rc != 0:
 | |
|         return None
 | |
|     return out.split('\n')[0]
 | |
| 
 | |
| 
 | |
| def get_conda_packages(run_lambda, patterns=None):
 | |
|     if patterns is None:
 | |
|         patterns = DEFAULT_CONDA_PATTERNS
 | |
|     conda = os.environ.get('CONDA_EXE', 'conda')
 | |
|     out = run_and_read_all(run_lambda, "{} list".format(conda))
 | |
|     if out is None:
 | |
|         return out
 | |
| 
 | |
|     return "\n".join(line for line in out.splitlines()
 | |
|                      if not line.startswith("#") and any(name in line
 | |
|                                                          for name in patterns))
 | |
| 
 | |
| 
 | |
| def get_gcc_version(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
 | |
| 
 | |
| 
 | |
| def get_clang_version(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'clang --version',
 | |
|                                      r'clang version (.*)')
 | |
| 
 | |
| 
 | |
| def get_cmake_version(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'cmake --version',
 | |
|                                      r'cmake (.*)')
 | |
| 
 | |
| 
 | |
| def get_nvidia_driver_version(run_lambda):
 | |
|     if get_platform() == 'darwin':
 | |
|         cmd = 'kextstat | grep -i cuda'
 | |
|         return run_and_parse_first_match(run_lambda, cmd,
 | |
|                                          r'com[.]nvidia[.]CUDA [(](.*?)[)]')
 | |
|     smi = get_nvidia_smi()
 | |
|     return run_and_parse_first_match(run_lambda, smi,
 | |
|                                      r'Driver Version: (.*?) ')
 | |
| 
 | |
| 
 | |
| def get_gpu_info(run_lambda):
 | |
|     if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(
 | |
|             torch.version, 'hip') and torch.version.hip is not None):
 | |
|         if TORCH_AVAILABLE and torch.cuda.is_available():
 | |
|             if torch.version.hip is not None:
 | |
|                 prop = torch.cuda.get_device_properties(0)
 | |
|                 if hasattr(prop, "gcnArchName"):
 | |
|                     gcnArch = " ({})".format(prop.gcnArchName)
 | |
|                 else:
 | |
|                     gcnArch = "NoGCNArchNameOnOldPyTorch"
 | |
|             else:
 | |
|                 gcnArch = ""
 | |
|             return torch.cuda.get_device_name(None) + gcnArch
 | |
|         return None
 | |
|     smi = get_nvidia_smi()
 | |
|     uuid_regex = re.compile(r' \(UUID: .+?\)')
 | |
|     rc, out, _ = run_lambda(smi + ' -L')
 | |
|     if rc != 0:
 | |
|         return None
 | |
|     # Anonymize GPUs by removing their UUID
 | |
|     return re.sub(uuid_regex, '', out)
 | |
| 
 | |
| 
 | |
| def get_running_cuda_version(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'nvcc --version',
 | |
|                                      r'release .+ V(.*)')
 | |
| 
 | |
| 
 | |
| def get_cudnn_version(run_lambda):
 | |
|     """Return a list of libcudnn.so; it's hard to tell which one is being used."""
 | |
|     if get_platform() == 'win32':
 | |
|         system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
 | |
|         cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
 | |
|         where_cmd = os.path.join(system_root, 'System32', 'where')
 | |
|         cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
 | |
|     elif get_platform() == 'darwin':
 | |
|         # CUDA libraries and drivers can be found in /usr/local/cuda/. See
 | |
|         # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
 | |
|         # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
 | |
|         # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
 | |
|         cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
 | |
|     else:
 | |
|         cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
 | |
|     rc, out, _ = run_lambda(cudnn_cmd)
 | |
|     # find will return 1 if there are permission errors or if not found
 | |
|     if len(out) == 0 or (rc != 1 and rc != 0):
 | |
|         l = os.environ.get('CUDNN_LIBRARY')
 | |
|         if l is not None and os.path.isfile(l):
 | |
|             return os.path.realpath(l)
 | |
|         return None
 | |
|     files_set = set()
 | |
|     for fn in out.split('\n'):
 | |
|         fn = os.path.realpath(fn)  # eliminate symbolic links
 | |
|         if os.path.isfile(fn):
 | |
|             files_set.add(fn)
 | |
|     if not files_set:
 | |
|         return None
 | |
|     # Alphabetize the result because the order is non-deterministic otherwise
 | |
|     files = sorted(files_set)
 | |
|     if len(files) == 1:
 | |
|         return files[0]
 | |
|     result = '\n'.join(files)
 | |
|     return 'Probably one of the following:\n{}'.format(result)
 | |
| 
 | |
| 
 | |
| def get_nvidia_smi():
 | |
|     # Note: nvidia-smi is currently available only on Windows and Linux
 | |
|     smi = 'nvidia-smi'
 | |
|     if get_platform() == 'win32':
 | |
|         system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
 | |
|         program_files_root = os.environ.get('PROGRAMFILES',
 | |
|                                             'C:\\Program Files')
 | |
|         legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation',
 | |
|                                    'NVSMI', smi)
 | |
|         new_path = os.path.join(system_root, 'System32', smi)
 | |
|         smis = [new_path, legacy_path]
 | |
|         for candidate_smi in smis:
 | |
|             if os.path.exists(candidate_smi):
 | |
|                 smi = '"{}"'.format(candidate_smi)
 | |
|                 break
 | |
|     return smi
 | |
| 
 | |
| 
 | |
| def get_rocm_version(run_lambda):
 | |
|     """Returns the ROCm version if available, otherwise 'N/A'."""
 | |
|     return run_and_parse_first_match(run_lambda, 'hipcc --version',
 | |
|                                      r'HIP version: (\S+)')
 | |
| 
 | |
| 
 | |
| def get_neuron_sdk_version(run_lambda):
 | |
|     # Adapted from your install script
 | |
|     try:
 | |
|         result = run_lambda(["neuron-ls"])
 | |
|         return result if result[0] == 0 else 'N/A'
 | |
|     except Exception:
 | |
|         return 'N/A'
 | |
| 
 | |
| 
 | |
| def get_vllm_version():
 | |
|     from vllm import __version__, __version_tuple__
 | |
| 
 | |
|     if __version__ == "dev":
 | |
|         return "N/A (dev)"
 | |
| 
 | |
|     if len(__version_tuple__) == 4: # dev build
 | |
|         git_sha = __version_tuple__[-1][1:] # type: ignore
 | |
|         return f"{__version__} (git sha: {git_sha}"
 | |
| 
 | |
|     return __version__
 | |
| 
 | |
| def summarize_vllm_build_flags():
 | |
|     # This could be a static method if the flags are constant, or dynamic if you need to check environment variables, etc.
 | |
|     return 'CUDA Archs: {}; ROCm: {}; Neuron: {}'.format(
 | |
|         os.environ.get('TORCH_CUDA_ARCH_LIST', 'Not Set'),
 | |
|         'Enabled' if os.environ.get('ROCM_HOME') else 'Disabled',
 | |
|         'Enabled' if os.environ.get('NEURON_CORES') else 'Disabled',
 | |
|     )
 | |
| 
 | |
| 
 | |
| def get_gpu_topo(run_lambda):
 | |
|     output = None
 | |
| 
 | |
|     if get_platform() == 'linux':
 | |
|         output = run_and_read_all(run_lambda, 'nvidia-smi topo -m')
 | |
|         if output is None:
 | |
|             output = run_and_read_all(run_lambda, 'rocm-smi --showtopo')
 | |
| 
 | |
|     return output
 | |
| 
 | |
| 
 | |
| # example outputs of CPU infos
 | |
| #  * linux
 | |
| #    Architecture:            x86_64
 | |
| #      CPU op-mode(s):        32-bit, 64-bit
 | |
| #      Address sizes:         46 bits physical, 48 bits virtual
 | |
| #      Byte Order:            Little Endian
 | |
| #    CPU(s):                  128
 | |
| #      On-line CPU(s) list:   0-127
 | |
| #    Vendor ID:               GenuineIntel
 | |
| #      Model name:            Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
 | |
| #        CPU family:          6
 | |
| #        Model:               106
 | |
| #        Thread(s) per core:  2
 | |
| #        Core(s) per socket:  32
 | |
| #        Socket(s):           2
 | |
| #        Stepping:            6
 | |
| #        BogoMIPS:            5799.78
 | |
| #        Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
 | |
| #                             sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
 | |
| #                             xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
 | |
| #                             pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
 | |
| #                             hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
 | |
| #                             fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
 | |
| #                             avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
 | |
| #                             xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
 | |
| #                             avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
 | |
| #    Virtualization features:
 | |
| #      Hypervisor vendor:     KVM
 | |
| #      Virtualization type:   full
 | |
| #    Caches (sum of all):
 | |
| #      L1d:                   3 MiB (64 instances)
 | |
| #      L1i:                   2 MiB (64 instances)
 | |
| #      L2:                    80 MiB (64 instances)
 | |
| #      L3:                    108 MiB (2 instances)
 | |
| #    NUMA:
 | |
| #      NUMA node(s):          2
 | |
| #      NUMA node0 CPU(s):     0-31,64-95
 | |
| #      NUMA node1 CPU(s):     32-63,96-127
 | |
| #    Vulnerabilities:
 | |
| #      Itlb multihit:         Not affected
 | |
| #      L1tf:                  Not affected
 | |
| #      Mds:                   Not affected
 | |
| #      Meltdown:              Not affected
 | |
| #      Mmio stale data:       Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
 | |
| #      Retbleed:              Not affected
 | |
| #      Spec store bypass:     Mitigation; Speculative Store Bypass disabled via prctl and seccomp
 | |
| #      Spectre v1:            Mitigation; usercopy/swapgs barriers and __user pointer sanitization
 | |
| #      Spectre v2:            Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
 | |
| #      Srbds:                 Not affected
 | |
| #      Tsx async abort:       Not affected
 | |
| #  * win32
 | |
| #    Architecture=9
 | |
| #    CurrentClockSpeed=2900
 | |
| #    DeviceID=CPU0
 | |
| #    Family=179
 | |
| #    L2CacheSize=40960
 | |
| #    L2CacheSpeed=
 | |
| #    Manufacturer=GenuineIntel
 | |
| #    MaxClockSpeed=2900
 | |
| #    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
 | |
| #    ProcessorType=3
 | |
| #    Revision=27142
 | |
| #
 | |
| #    Architecture=9
 | |
| #    CurrentClockSpeed=2900
 | |
| #    DeviceID=CPU1
 | |
| #    Family=179
 | |
| #    L2CacheSize=40960
 | |
| #    L2CacheSpeed=
 | |
| #    Manufacturer=GenuineIntel
 | |
| #    MaxClockSpeed=2900
 | |
| #    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
 | |
| #    ProcessorType=3
 | |
| #    Revision=27142
 | |
| 
 | |
| 
 | |
| def get_cpu_info(run_lambda):
 | |
|     rc, out, err = 0, '', ''
 | |
|     if get_platform() == 'linux':
 | |
|         rc, out, err = run_lambda('lscpu')
 | |
|     elif get_platform() == 'win32':
 | |
|         rc, out, err = run_lambda(
 | |
|             'wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
 | |
|         CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE'
 | |
|         )
 | |
|     elif get_platform() == 'darwin':
 | |
|         rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
 | |
|     cpu_info = 'None'
 | |
|     if rc == 0:
 | |
|         cpu_info = out
 | |
|     else:
 | |
|         cpu_info = err
 | |
|     return cpu_info
 | |
| 
 | |
| 
 | |
| def get_platform():
 | |
|     if sys.platform.startswith('linux'):
 | |
|         return 'linux'
 | |
|     elif sys.platform.startswith('win32'):
 | |
|         return 'win32'
 | |
|     elif sys.platform.startswith('cygwin'):
 | |
|         return 'cygwin'
 | |
|     elif sys.platform.startswith('darwin'):
 | |
|         return 'darwin'
 | |
|     else:
 | |
|         return sys.platform
 | |
| 
 | |
| 
 | |
| def get_mac_version(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion',
 | |
|                                      r'(.*)')
 | |
| 
 | |
| 
 | |
| def get_windows_version(run_lambda):
 | |
|     system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
 | |
|     wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
 | |
|     findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
 | |
|     return run_and_read_all(
 | |
|         run_lambda,
 | |
|         '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
 | |
| 
 | |
| 
 | |
| def get_lsb_version(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'lsb_release -a',
 | |
|                                      r'Description:\t(.*)')
 | |
| 
 | |
| 
 | |
| def check_release_file(run_lambda):
 | |
|     return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
 | |
|                                      r'PRETTY_NAME="(.*)"')
 | |
| 
 | |
| 
 | |
| def get_os(run_lambda):
 | |
|     from platform import machine
 | |
|     platform = get_platform()
 | |
| 
 | |
|     if platform == 'win32' or platform == 'cygwin':
 | |
|         return get_windows_version(run_lambda)
 | |
| 
 | |
|     if platform == 'darwin':
 | |
|         version = get_mac_version(run_lambda)
 | |
|         if version is None:
 | |
|             return None
 | |
|         return 'macOS {} ({})'.format(version, machine())
 | |
| 
 | |
|     if platform == 'linux':
 | |
|         # Ubuntu/Debian based
 | |
|         desc = get_lsb_version(run_lambda)
 | |
|         if desc is not None:
 | |
|             return '{} ({})'.format(desc, machine())
 | |
| 
 | |
|         # Try reading /etc/*-release
 | |
|         desc = check_release_file(run_lambda)
 | |
|         if desc is not None:
 | |
|             return '{} ({})'.format(desc, machine())
 | |
| 
 | |
|         return '{} ({})'.format(platform, machine())
 | |
| 
 | |
|     # Unknown platform
 | |
|     return platform
 | |
| 
 | |
| 
 | |
| def get_python_platform():
 | |
|     import platform
 | |
|     return platform.platform()
 | |
| 
 | |
| 
 | |
| def get_libc_version():
 | |
|     import platform
 | |
|     if get_platform() != 'linux':
 | |
|         return 'N/A'
 | |
|     return '-'.join(platform.libc_ver())
 | |
| 
 | |
| 
 | |
| def get_pip_packages(run_lambda, patterns=None):
 | |
|     """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages."""
 | |
|     if patterns is None:
 | |
|         patterns = DEFAULT_PIP_PATTERNS
 | |
| 
 | |
|     # People generally have `pip` as `pip` or `pip3`
 | |
|     # But here it is invoked as `python -mpip`
 | |
|     def run_with_pip(pip):
 | |
|         out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
 | |
|         return "\n".join(line for line in out.splitlines()
 | |
|                          if any(name in line for name in patterns))
 | |
| 
 | |
|     pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
 | |
|     out = run_with_pip([sys.executable, '-mpip'])
 | |
| 
 | |
|     return pip_version, out
 | |
| 
 | |
| 
 | |
| def get_cachingallocator_config():
 | |
|     ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
 | |
|     return ca_config
 | |
| 
 | |
| 
 | |
| def get_cuda_module_loading_config():
 | |
|     if TORCH_AVAILABLE and torch.cuda.is_available():
 | |
|         torch.cuda.init()
 | |
|         config = os.environ.get('CUDA_MODULE_LOADING', '')
 | |
|         return config
 | |
|     else:
 | |
|         return "N/A"
 | |
| 
 | |
| 
 | |
| def is_xnnpack_available():
 | |
|     if TORCH_AVAILABLE:
 | |
|         import torch.backends.xnnpack
 | |
|         return str(
 | |
|             torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
 | |
|     else:
 | |
|         return "N/A"
 | |
| 
 | |
| def get_env_vars():
 | |
|     env_vars = ''
 | |
|     secret_terms=('secret', 'token', 'api', 'access', 'password')
 | |
|     report_prefix = ("TORCH", "NCCL", "PYTORCH",
 | |
|                      "CUDA", "CUBLAS", "CUDNN",
 | |
|                      "OMP_", "MKL_",
 | |
|                      "NVIDIA")
 | |
|     for k, v in os.environ.items():
 | |
|         if any(term in k.lower() for term in secret_terms):
 | |
|             continue
 | |
|         if k in environment_variables:
 | |
|             env_vars = env_vars + "{}={}".format(k, v) + "\n"
 | |
|         if k.startswith(report_prefix):
 | |
|             env_vars = env_vars + "{}={}".format(k, v) + "\n"
 | |
| 
 | |
|     return env_vars
 | |
| 
 | |
| def get_env_info():
 | |
|     run_lambda = run
 | |
|     pip_version, pip_list_output = get_pip_packages(run_lambda)
 | |
| 
 | |
|     if TORCH_AVAILABLE:
 | |
|         version_str = torch.__version__
 | |
|         debug_mode_str = str(torch.version.debug)
 | |
|         cuda_available_str = str(torch.cuda.is_available())
 | |
|         cuda_version_str = torch.version.cuda
 | |
|         if not hasattr(torch.version,
 | |
|                        'hip') or torch.version.hip is None:  # cuda version
 | |
|             hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
 | |
|         else:  # HIP version
 | |
| 
 | |
|             def get_version_or_na(cfg, prefix):
 | |
|                 _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
 | |
|                 return _lst[0] if _lst else 'N/A'
 | |
| 
 | |
|             cfg = torch._C._show_config().split('\n')
 | |
|             hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
 | |
|             miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
 | |
|             cuda_version_str = 'N/A'
 | |
|             hip_compiled_version = torch.version.hip
 | |
|     else:
 | |
|         version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
 | |
|         hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
 | |
| 
 | |
|     sys_version = sys.version.replace("\n", " ")
 | |
| 
 | |
|     conda_packages = get_conda_packages(run_lambda)
 | |
| 
 | |
|     rocm_version = get_rocm_version(run_lambda)
 | |
|     neuron_sdk_version = get_neuron_sdk_version(run_lambda)
 | |
|     vllm_version = get_vllm_version()
 | |
|     vllm_build_flags = summarize_vllm_build_flags()
 | |
|     gpu_topo = get_gpu_topo(run_lambda)
 | |
| 
 | |
|     return SystemEnv(
 | |
|         torch_version=version_str,
 | |
|         is_debug_build=debug_mode_str,
 | |
|         python_version='{} ({}-bit runtime)'.format(
 | |
|             sys_version,
 | |
|             sys.maxsize.bit_length() + 1),
 | |
|         python_platform=get_python_platform(),
 | |
|         is_cuda_available=cuda_available_str,
 | |
|         cuda_compiled_version=cuda_version_str,
 | |
|         cuda_runtime_version=get_running_cuda_version(run_lambda),
 | |
|         cuda_module_loading=get_cuda_module_loading_config(),
 | |
|         nvidia_gpu_models=get_gpu_info(run_lambda),
 | |
|         nvidia_driver_version=get_nvidia_driver_version(run_lambda),
 | |
|         cudnn_version=get_cudnn_version(run_lambda),
 | |
|         hip_compiled_version=hip_compiled_version,
 | |
|         hip_runtime_version=hip_runtime_version,
 | |
|         miopen_runtime_version=miopen_runtime_version,
 | |
|         pip_version=pip_version,
 | |
|         pip_packages=pip_list_output,
 | |
|         conda_packages=conda_packages,
 | |
|         os=get_os(run_lambda),
 | |
|         libc_version=get_libc_version(),
 | |
|         gcc_version=get_gcc_version(run_lambda),
 | |
|         clang_version=get_clang_version(run_lambda),
 | |
|         cmake_version=get_cmake_version(run_lambda),
 | |
|         caching_allocator_config=get_cachingallocator_config(),
 | |
|         is_xnnpack_available=is_xnnpack_available(),
 | |
|         cpu_info=get_cpu_info(run_lambda),
 | |
|         rocm_version=rocm_version,
 | |
|         neuron_sdk_version=neuron_sdk_version,
 | |
|         vllm_version=vllm_version,
 | |
|         vllm_build_flags=vllm_build_flags,
 | |
|         gpu_topo=gpu_topo,
 | |
|         env_vars=get_env_vars(),
 | |
|     )
 | |
| 
 | |
| 
 | |
| env_info_fmt = """
 | |
| PyTorch version: {torch_version}
 | |
| Is debug build: {is_debug_build}
 | |
| CUDA used to build PyTorch: {cuda_compiled_version}
 | |
| ROCM used to build PyTorch: {hip_compiled_version}
 | |
| 
 | |
| OS: {os}
 | |
| GCC version: {gcc_version}
 | |
| Clang version: {clang_version}
 | |
| CMake version: {cmake_version}
 | |
| Libc version: {libc_version}
 | |
| 
 | |
| Python version: {python_version}
 | |
| Python platform: {python_platform}
 | |
| Is CUDA available: {is_cuda_available}
 | |
| CUDA runtime version: {cuda_runtime_version}
 | |
| CUDA_MODULE_LOADING set to: {cuda_module_loading}
 | |
| GPU models and configuration: {nvidia_gpu_models}
 | |
| Nvidia driver version: {nvidia_driver_version}
 | |
| cuDNN version: {cudnn_version}
 | |
| HIP runtime version: {hip_runtime_version}
 | |
| MIOpen runtime version: {miopen_runtime_version}
 | |
| Is XNNPACK available: {is_xnnpack_available}
 | |
| 
 | |
| CPU:
 | |
| {cpu_info}
 | |
| 
 | |
| Versions of relevant libraries:
 | |
| {pip_packages}
 | |
| {conda_packages}
 | |
| """.strip()
 | |
| 
 | |
| # both the above code and the following code use `strip()` to
 | |
| # remove leading/trailing whitespaces, so we need to add a newline
 | |
| # in between to separate the two sections
 | |
| env_info_fmt += "\n"
 | |
| 
 | |
| env_info_fmt += """
 | |
| ROCM Version: {rocm_version}
 | |
| Neuron SDK Version: {neuron_sdk_version}
 | |
| vLLM Version: {vllm_version}
 | |
| vLLM Build Flags:
 | |
| {vllm_build_flags}
 | |
| GPU Topology:
 | |
| {gpu_topo}
 | |
| 
 | |
| {env_vars}
 | |
| """.strip()
 | |
| 
 | |
| 
 | |
| def pretty_str(envinfo):
 | |
| 
 | |
|     def replace_nones(dct, replacement='Could not collect'):
 | |
|         for key in dct.keys():
 | |
|             if dct[key] is not None:
 | |
|                 continue
 | |
|             dct[key] = replacement
 | |
|         return dct
 | |
| 
 | |
|     def replace_bools(dct, true='Yes', false='No'):
 | |
|         for key in dct.keys():
 | |
|             if dct[key] is True:
 | |
|                 dct[key] = true
 | |
|             elif dct[key] is False:
 | |
|                 dct[key] = false
 | |
|         return dct
 | |
| 
 | |
|     def prepend(text, tag='[prepend]'):
 | |
|         lines = text.split('\n')
 | |
|         updated_lines = [tag + line for line in lines]
 | |
|         return '\n'.join(updated_lines)
 | |
| 
 | |
|     def replace_if_empty(text, replacement='No relevant packages'):
 | |
|         if text is not None and len(text) == 0:
 | |
|             return replacement
 | |
|         return text
 | |
| 
 | |
|     def maybe_start_on_next_line(string):
 | |
|         # If `string` is multiline, prepend a \n to it.
 | |
|         if string is not None and len(string.split('\n')) > 1:
 | |
|             return '\n{}\n'.format(string)
 | |
|         return string
 | |
| 
 | |
|     mutable_dict = envinfo._asdict()
 | |
| 
 | |
|     # If nvidia_gpu_models is multiline, start on the next line
 | |
|     mutable_dict['nvidia_gpu_models'] = \
 | |
|         maybe_start_on_next_line(envinfo.nvidia_gpu_models)
 | |
| 
 | |
|     # If the machine doesn't have CUDA, report some fields as 'No CUDA'
 | |
|     dynamic_cuda_fields = [
 | |
|         'cuda_runtime_version',
 | |
|         'nvidia_gpu_models',
 | |
|         'nvidia_driver_version',
 | |
|     ]
 | |
|     all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
 | |
|     all_dynamic_cuda_fields_missing = all(mutable_dict[field] is None
 | |
|                                           for field in dynamic_cuda_fields)
 | |
|     if TORCH_AVAILABLE and not torch.cuda.is_available(
 | |
|     ) and all_dynamic_cuda_fields_missing:
 | |
|         for field in all_cuda_fields:
 | |
|             mutable_dict[field] = 'No CUDA'
 | |
|         if envinfo.cuda_compiled_version is None:
 | |
|             mutable_dict['cuda_compiled_version'] = 'None'
 | |
| 
 | |
|     # Replace True with Yes, False with No
 | |
|     mutable_dict = replace_bools(mutable_dict)
 | |
| 
 | |
|     # Replace all None objects with 'Could not collect'
 | |
|     mutable_dict = replace_nones(mutable_dict)
 | |
| 
 | |
|     # If either of these are '', replace with 'No relevant packages'
 | |
|     mutable_dict['pip_packages'] = replace_if_empty(
 | |
|         mutable_dict['pip_packages'])
 | |
|     mutable_dict['conda_packages'] = replace_if_empty(
 | |
|         mutable_dict['conda_packages'])
 | |
| 
 | |
|     # Tag conda and pip packages with a prefix
 | |
|     # If they were previously None, they'll show up as ie '[conda] Could not collect'
 | |
|     if mutable_dict['pip_packages']:
 | |
|         mutable_dict['pip_packages'] = prepend(
 | |
|             mutable_dict['pip_packages'], '[{}] '.format(envinfo.pip_version))
 | |
|     if mutable_dict['conda_packages']:
 | |
|         mutable_dict['conda_packages'] = prepend(
 | |
|             mutable_dict['conda_packages'], '[conda] ')
 | |
|     mutable_dict['cpu_info'] = envinfo.cpu_info
 | |
|     return env_info_fmt.format(**mutable_dict)
 | |
| 
 | |
| 
 | |
| def get_pretty_env_info():
 | |
|     return pretty_str(get_env_info())
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     print("Collecting environment information...")
 | |
|     output = get_pretty_env_info()
 | |
|     print(output)
 | |
| 
 | |
|     if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(
 | |
|             torch.utils, '_crash_handler'):
 | |
|         minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
 | |
|         if sys.platform == "linux" and os.path.exists(minidump_dir):
 | |
|             dumps = [
 | |
|                 os.path.join(minidump_dir, dump)
 | |
|                 for dump in os.listdir(minidump_dir)
 | |
|             ]
 | |
|             latest = max(dumps, key=os.path.getctime)
 | |
|             ctime = os.path.getctime(latest)
 | |
|             creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
 | |
|                 '%Y-%m-%d %H:%M:%S')
 | |
|             msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
 | |
|                   "if this is related to your bug please include it when you file a report ***"
 | |
|             print(msg, file=sys.stderr)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     main()
 |