#!/usr/bin/env python3 import argparse import os import sys from pathlib import Path # NOTE: `tools/amd_build/build_amd.py` could be a symlink. # The behavior of `symlink / '..'` is different from `symlink.parent`. # Use `pardir` three times rather than using `path.parents[2]`. REPO_ROOT = ( Path(__file__).absolute() / os.path.pardir / os.path.pardir / os.path.pardir ).resolve() sys.path.append(str(REPO_ROOT / "torch" / "utils")) from hipify import hipify_python # type: ignore[import] parser = argparse.ArgumentParser( description="Top-level script for HIPifying, filling in most common parameters" ) parser.add_argument( "--out-of-place-only", action="store_true", help="Whether to only run hipify out-of-place on source files", ) parser.add_argument( "--project-directory", type=str, default="", help="The root of the project.", required=False, ) parser.add_argument( "--output-directory", type=str, default="", help="The directory to store the hipified project", required=False, ) parser.add_argument( "--extra-include-dir", type=str, default=[], nargs="+", help="The list of extra directories in caffe2 to hipify", required=False, ) args = parser.parse_args() # NOTE: `tools/amd_build/build_amd.py` could be a symlink. amd_build_dir = os.path.dirname(os.path.realpath(__file__)) proj_dir = os.path.dirname(os.path.dirname(amd_build_dir)) if args.project_directory: proj_dir = args.project_directory out_dir = proj_dir if args.output_directory: out_dir = args.output_directory includes = [ "caffe2/operators/*", "caffe2/sgd/*", "caffe2/image/*", "caffe2/transforms/*", "caffe2/video/*", "caffe2/distributed/*", "caffe2/queue/*", "caffe2/contrib/aten/*", "binaries/*", "caffe2/**/*_test*", "caffe2/core/*", "caffe2/db/*", "caffe2/utils/*", "caffe2/contrib/gloo/*", "caffe2/contrib/nccl/*", "c10/cuda/*", "c10/cuda/test/CMakeLists.txt", "modules/*", "third_party/nvfuser/*", # PyTorch paths # Keep this synchronized with is_pytorch_file in hipify_python.py "aten/src/ATen/cuda/*", "aten/src/ATen/native/cuda/*", "aten/src/ATen/native/cudnn/*", "aten/src/ATen/native/quantized/cudnn/*", "aten/src/ATen/native/nested/cuda/*", "aten/src/ATen/native/sparse/cuda/*", "aten/src/ATen/native/quantized/cuda/*", "aten/src/ATen/native/transformers/cuda/attention_backward.cu", "aten/src/ATen/native/transformers/cuda/attention.cu", "aten/src/ATen/native/transformers/cuda/sdp_utils.cpp", "aten/src/ATen/native/transformers/cuda/sdp_utils.h", "aten/src/ATen/native/transformers/cuda/mem_eff_attention/debug_utils.h", "aten/src/ATen/native/transformers/cuda/mem_eff_attention/gemm_kernel_utils.h", "aten/src/ATen/native/transformers/cuda/mem_eff_attention/pytorch_utils.h", "aten/src/THC/*", "aten/src/ATen/test/*", # CMakeLists.txt isn't processed by default, but there are a few # we do want to handle, so explicitly specify them "aten/src/THC/CMakeLists.txt", "torch/*", "tools/autograd/templates/python_variable_methods.cpp", "torch/csrc/stable/*", ] includes = [os.path.join(proj_dir, include) for include in includes] for new_dir in args.extra_include_dir: abs_new_dir = os.path.join(proj_dir, new_dir) if os.path.exists(abs_new_dir): abs_new_dir = os.path.join(abs_new_dir, "**/*") includes.append(abs_new_dir) ignores = [ "caffe2/operators/depthwise_3x3_conv_op_cudnn.cu", "caffe2/operators/pool_op_cudnn.cu", "*/hip/*", # These files are compatible with both cuda and hip "aten/src/ATen/core/*", # Correct path to generate HIPConfig.h: # CUDAConfig.h.in -> (amd_build) HIPConfig.h.in -> (cmake) HIPConfig.h "aten/src/ATen/cuda/CUDAConfig.h", "third_party/nvfuser/csrc/codegen.cpp", "third_party/nvfuser/runtime/block_reduction.cu", "third_party/nvfuser/runtime/block_sync_atomic.cu", "third_party/nvfuser/runtime/block_sync_default_rocm.cu", "third_party/nvfuser/runtime/broadcast.cu", "third_party/nvfuser/runtime/grid_reduction.cu", "third_party/nvfuser/runtime/helpers.cu", "torch/csrc/jit/codegen/fuser/cuda/resource_strings.h", "torch/csrc/jit/tensorexpr/ir_printer.cpp", "torch/csrc/jit/ir/ir.h", # generated files we shouldn't frob "torch/lib/tmp_install/*", "torch/include/*", ] ignores = [os.path.join(proj_dir, ignore) for ignore in ignores] # Check if the compiler is hip-clang. # # This used to be a useful function but now we can safely always assume hip-clang. # Leaving the function here avoids bc-linter errors. def is_hip_clang() -> bool: return True # TODO Remove once the following submodules are updated hip_platform_files = [ "third_party/fbgemm/fbgemm_gpu/CMakeLists.txt", "third_party/fbgemm/fbgemm_gpu/cmake/Hip.cmake", "third_party/fbgemm/fbgemm_gpu/codegen/embedding_backward_dense_host.cpp", "third_party/fbgemm/fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp", "third_party/fbgemm/fbgemm_gpu/codegen/embedding_backward_split_template.cu", "third_party/fbgemm/fbgemm_gpu/codegen/embedding_forward_quantized_split_lookup.cu", "third_party/fbgemm/fbgemm_gpu/include/fbgemm_gpu/utils/cuda_prelude.cuh", "third_party/fbgemm/fbgemm_gpu/include/fbgemm_gpu/utils/stochastic_rounding.cuh", "third_party/fbgemm/fbgemm_gpu/include/fbgemm_gpu/utils/vec4.cuh", "third_party/fbgemm/fbgemm_gpu/include/fbgemm_gpu/utils/weight_row.cuh", "third_party/fbgemm/fbgemm_gpu/include/fbgemm_gpu/sparse_ops.cuh", "third_party/fbgemm/fbgemm_gpu/src/jagged_tensor_ops.cu", "third_party/fbgemm/fbgemm_gpu/src/quantize_ops.cu", "third_party/fbgemm/fbgemm_gpu/src/sparse_ops.cu", "third_party/fbgemm/fbgemm_gpu/src/split_embeddings_cache_cuda.cu", "third_party/fbgemm/fbgemm_gpu/src/topology_utils.cpp", "third_party/fbgemm/src/EmbeddingSpMDM.cc", "third_party/gloo/cmake/Dependencies.cmake", "third_party/gloo/gloo/cuda.cu", "third_party/kineto/libkineto/CMakeLists.txt", "third_party/nvfuser/CMakeLists.txt", "third_party/tensorpipe/cmake/Hip.cmake", ] def remove_hcc(line: str) -> str: line = line.replace("HIP_PLATFORM_HCC", "HIP_PLATFORM_AMD") line = line.replace("HIP_HCC_FLAGS", "HIP_CLANG_FLAGS") return line for hip_platform_file in hip_platform_files: do_write = False if os.path.exists(hip_platform_file): with open(hip_platform_file) as sources: lines = sources.readlines() newlines = [remove_hcc(line) for line in lines] if lines == newlines: print(f"{hip_platform_file} skipped") else: with open(hip_platform_file, "w") as sources: for line in newlines: sources.write(line) print(f"{hip_platform_file} updated") # NOTE: fbgemm sources needing hipify # fbgemm is its own project with its own build system. pytorch uses fbgemm as # a submodule to acquire some gpu source files but compiles only those sources # instead of using fbgemm's own build system. One of the source files refers # to a header file that is the result of running hipify, but fbgemm uses # slightly different hipify settings than pytorch. fbgemm normally hipifies # and renames tuning_cache.cuh to tuning_cache_hip.cuh, but pytorch's settings # for hipify puts it into its own 'hip' directory. After hipify runs below with # the added fbgemm file, we move it to its expected location. fbgemm_dir = "third_party/fbgemm/fbgemm_gpu/experimental/gen_ai/src/quantize/common/include/fbgemm_gpu/quantize" fbgemm_original = f"{fbgemm_dir}/tuning_cache.cuh" fbgemm_move_src = f"{fbgemm_dir}/hip/tuning_cache.cuh" fbgemm_move_dst = f"{fbgemm_dir}/tuning_cache_hip.cuh" hipify_python.hipify( project_directory=proj_dir, output_directory=out_dir, includes=includes, ignores=ignores, extra_files=[ "torch/_inductor/codegen/cuda/device_op_overrides.py", "torch/_inductor/codegen/cpp_wrapper_cpu.py", "torch/_inductor/codegen/cpp_wrapper_gpu.py", "torch/_inductor/codegen/wrapper.py", fbgemm_original, ], out_of_place_only=args.out_of_place_only, hip_clang_launch=is_hip_clang(), ) # only update the file if it changes or doesn't exist do_write = True src_lines = None with open(fbgemm_move_src) as src: src_lines = src.readlines() if os.path.exists(fbgemm_move_dst): dst_lines = None with open(fbgemm_move_dst) as dst: dst_lines = dst.readlines() if src_lines == dst_lines: print(f"{fbgemm_move_dst} skipped") do_write = False if do_write: with open(fbgemm_move_dst, "w") as dst: for line in src_lines: dst.write(line) print(f"{fbgemm_move_dst} updated")