xe: enable source debug information for OpenCL C kernels

2025-10-20 18:43:49 +08:00 · 2025-01-03 14:07:34 -08:00
parent 14015f03d5
commit d4b61e983b
10 changed files with 144 additions and 47 deletions
--- a/cmake/gen_gpu_kernel.cmake
+++ b/cmake/gen_gpu_kernel.cmake
@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2019-2024 Intel Corporation
+# Copyright 2019-2025 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -22,14 +22,17 @@

 file(READ ${CL_FILE} cl_file_lines)

-# Remove C++ style comments
-string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}")
-# Remove repeated whitespaces
-string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}")
-# Remove leading whitespaces
-string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}")
-# Remove empty lines
-string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}")
+string(LENGTH "${cl_file_lines}" len)
+if(MINIFY OR len GREATER 65535)
+    # Remove C++ style comments
+    string(REGEX REPLACE "//[^\n]*\n" "\n" cl_file_lines "${cl_file_lines}")
+    # Remove repeated whitespaces
+    string(REGEX REPLACE " +" " " cl_file_lines "${cl_file_lines}")
+    # Remove leading whitespaces
+    string(REGEX REPLACE "\n " "\n" cl_file_lines "${cl_file_lines}")
+    # Remove empty lines
+    string(REGEX REPLACE "\n+" "\n" cl_file_lines "${cl_file_lines}")
+endif()

 string(LENGTH "${cl_file_lines}" len)
 if(len GREATER 65535)
--- a/cmake/gen_gpu_kernel_list.cmake
+++ b/cmake/gen_gpu_kernel_list.cmake
@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2020-2021 Intel Corporation
+# Copyright 2020-2025 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -46,6 +46,11 @@ endfunction()
 function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
    set(_sources "${SOURCES}")

+    set(MINIFY "ON")
+    if(DNNL_DEV_MODE OR CMAKE_BUILD_TYPE STREQUAL "Debug")
+        set(MINIFY "OFF")
+    endif()
+
    set(KER_LIST_EXTERN)
    set(KER_LIST_ENTRIES)
    set(KER_HEADERS_EXTERN)
@ -62,6 +67,7 @@ function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
            COMMAND ${CMAKE_COMMAND}
                -DCL_FILE="${header_path}"
                -DGEN_FILE="${gen_file}"
+                -DMINIFY="${MINIFY}"
                -P ${PROJECT_SOURCE_DIR}/cmake/gen_gpu_kernel.cmake
            DEPENDS ${header_path}
        )
@ -81,6 +87,7 @@ function(gen_gpu_kernel_list ker_list_templ ker_list_src ker_sources headers)
            COMMAND ${CMAKE_COMMAND}
                -DCL_FILE="${ker_path}"
                -DGEN_FILE="${gen_file}"
+                -DMINIFY="${MINIFY}"
                -P ${PROJECT_SOURCE_DIR}/cmake/gen_gpu_kernel.cmake
            DEPENDS ${ker_path}
        )
--- a/src/gpu/intel/compute/compute_engine.hpp
+++ b/src/gpu/intel/compute/compute_engine.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -72,7 +72,8 @@ public:
    }

    virtual status_t create_kernel_from_binary(compute::kernel_t &kernel,
-            const xpu::binary_t &binary, const char *kernel_name) const = 0;
+            const xpu::binary_t &binary, const char *kernel_name,
+            const program_src_t &src) const = 0;

    virtual status_t create_kernels_from_cache_blob(
            const cache_blob_t &cache_blob,
--- a/src/gpu/intel/compute/kernel.hpp
+++ b/src/gpu/intel/compute/kernel.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -17,10 +17,15 @@
 #ifndef GPU_INTEL_COMPUTE_KERNEL_HPP
 #define GPU_INTEL_COMPUTE_KERNEL_HPP

+#if defined(__linux__) && (defined(DNNL_DEV_MODE) || !defined(NDEBUG))
+#include <unistd.h>
+#endif
+
 #include <functional>
 #include <memory>
 #include <utility>

+#include "common/utils.hpp"
 #include "common/verbose.hpp"
 #include "gpu/intel/compute/kernel_arg_list.hpp"
 #include "gpu/intel/compute/utils.hpp"
@ -34,6 +39,66 @@ namespace gpu {
 namespace intel {
 namespace compute {

+#if defined(__linux__) && (defined(DNNL_DEV_MODE) || !defined(NDEBUG))
+struct program_src_t {
+    program_src_t() = default;
+    program_src_t(const std::string &src_str) {
+        // Only enable if gdb-oneapi debugging is active
+        if (getenv_int("ZET_ENABLE_PROGRAM_DEBUGGING", 0) == 0) return;
+
+        const int name_size = 29;
+        char name[name_size] = "/tmp/dnnl_ocl_jit_src.XXXXXX";
+
+        // Ensure /tmp is a valid target for writing a temporary file
+        bool is_symlink = false;
+        status_t status = check_for_symlinks("/tmp", &is_symlink);
+        if (status != status::success || is_symlink) return;
+
+        // Guaranteed to have permissions 600 per the mkstemp specification,
+        // which is the minimum required for writing and then subsequently
+        // reading when debugging.
+        int fd = mkstemp(name);
+        if (fd == -1) return;
+
+        auto delete_fd = [&](int fd, char *name) {
+            // Unlink is called before close to ensure the file always exists
+            // and cannot be replaced with another file
+            unlink(name);
+            close(fd);
+        };
+
+        if (write(fd, src_str.c_str(), src_str.length()) == -1) {
+            delete_fd(fd, name);
+            return;
+        }
+        if (fsync(fd) == -1) {
+            delete_fd(fd, name);
+            return;
+        }
+
+        auto deleter = [&](char *name) {
+            delete_fd(fd, name);
+            delete[] name;
+        };
+
+        name_ = std::shared_ptr<char>(new char[name_size], deleter);
+        std::memcpy(name_.get(), name, name_size);
+    }
+    operator bool() const { return name_ != nullptr; };
+    const char *name() const { return name_.get(); }
+
+private:
+    std::shared_ptr<char> name_;
+};
+#else
+struct program_src_t {
+    program_src_t() = default;
+    program_src_t(const std::string &src_str) {}
+    operator bool() const { return false; }
+    const char *name() const { return nullptr; }
+};
+#endif
+
 class kernel_impl_t {
 public:
    kernel_impl_t() = default;
--- a/src/gpu/intel/ocl/ocl_gpu_engine.cpp
+++ b/src/gpu/intel/ocl/ocl_gpu_engine.cpp
@ -142,8 +142,8 @@ status_t create_ocl_kernel_from_cache_blob(const ocl_gpu_engine_t *ocl_engine,
        OCL_CHECK(err);

        std::shared_ptr<compute::kernel_impl_t> kernel_impl
-                = std::make_shared<ocl_gpu_kernel_t>(
-                        std::move(ocl_kernel), arg_types);
+                = std::make_shared<ocl_gpu_kernel_t>(std::move(ocl_kernel),
+                        arg_types, compute::program_src_t());
        (*kernels)[i] = std::move(kernel_impl);
    }

@ -231,7 +231,8 @@ inline status_t fuse_microkernels(cl_context context, cl_device_id device,
 } // namespace

 status_t ocl_gpu_engine_t::build_program_from_source(
-        xpu::ocl::wrapper_t<cl_program> &program, const char *code_string,
+        xpu::ocl::wrapper_t<cl_program> &program, compute::program_src_t &src,
+        const char *code_string,
        const compute::kernel_ctx_t &kernel_ctx) const {
    std::string options = kernel_ctx.options();

@ -251,6 +252,9 @@ status_t ocl_gpu_engine_t::build_program_from_source(
    std::string pp_code_str = pp_code.str();
    const char *pp_code_str_ptr = pp_code_str.c_str();

+    src = {pp_code_str};
+    if (src) { options += " -g -s " + std::string(src.name()); }
+
    debugdump_processed_source(
            pp_code_str, options, dev_info->get_cl_ext_options());

@ -270,7 +274,8 @@ status_t ocl_gpu_engine_t::build_program_from_source(
 }

 status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel,
-        const xpu::binary_t &binary, const char *kernel_name) const {
+        const xpu::binary_t &binary, const char *kernel_name,
+        const compute::program_src_t &src) const {
    xpu::ocl::wrapper_t<cl_program> program;
    CHECK(xpu::ocl::create_program(
            program, this->device(), this->context(), binary));
@ -285,7 +290,7 @@ status_t ocl_gpu_engine_t::create_kernel_from_binary(compute::kernel_t &kernel,

    std::shared_ptr<compute::kernel_impl_t> kernel_impl
            = std::make_shared<ocl_gpu_kernel_t>(
-                    std::move(ocl_kernel), arg_types);
+                    std::move(ocl_kernel), arg_types, src);
    kernel = std::move(kernel_impl);

    return status::success;
@ -303,14 +308,14 @@ status_t ocl_gpu_engine_t::create_kernel(
    if (!jitter) return status::invalid_arguments;
    xpu::binary_t binary = jitter->get_binary(this);
    if (binary.empty()) return status::runtime_error;
-    VCHECK_KERNEL(
-            create_kernel_from_binary(*kernel, binary, jitter->kernel_name()),
+    VCHECK_KERNEL(create_kernel_from_binary(
+                          *kernel, binary, jitter->kernel_name(), {}),
            VERBOSE_KERNEL_CREATION_FAIL, jitter->kernel_name());
    return status::success;
 }

 status_t ocl_gpu_engine_t::create_program(
-        xpu::ocl::wrapper_t<cl_program> &program,
+        xpu::ocl::wrapper_t<cl_program> &program, compute::program_src_t &src,
        const std::vector<const char *> &kernel_names,
        const compute::kernel_ctx_t &kernel_ctx) const {

@ -344,7 +349,7 @@ status_t ocl_gpu_engine_t::create_program(
            "kernels in a single .cl source file or split creation in groups "
            "based on their .cl source file.";

-    return build_program_from_source(program, source, kernel_ctx);
+    return build_program_from_source(program, src, source, kernel_ctx);
 }

 status_t ocl_gpu_engine_t::create_kernels(
@ -356,13 +361,15 @@ status_t ocl_gpu_engine_t::create_kernels(
    *kernels = std::vector<compute::kernel_t>(kernel_names.size());

    xpu::ocl::wrapper_t<cl_program> program;
-    CHECK(create_program(program, kernel_names, kernel_ctx));
-    return create_kernels_from_program(kernels, kernel_names, program);
+    compute::program_src_t src;
+    CHECK(create_program(program, src, kernel_names, kernel_ctx));
+    return create_kernels_from_program(kernels, kernel_names, program, src);
 }

 status_t ocl_gpu_engine_t::create_kernels_from_program(
        std::vector<compute::kernel_t> *kernels,
-        const std::vector<const char *> &kernel_names, cl_program program) {
+        const std::vector<const char *> &kernel_names, cl_program program,
+        const compute::program_src_t &src) {
    *kernels = std::vector<compute::kernel_t>(kernel_names.size());
    for (size_t i = 0; i < kernel_names.size(); ++i) {
        if (!kernel_names[i]) continue;
@ -375,7 +382,7 @@ status_t ocl_gpu_engine_t::create_kernels_from_program(

        std::shared_ptr<compute::kernel_impl_t> kernel_impl
                = std::make_shared<ocl_gpu_kernel_t>(
-                        std::move(ocl_kernel), arg_types);
+                        std::move(ocl_kernel), arg_types, src);
        (*kernels)[i] = std::move(kernel_impl);
    }

--- a/src/gpu/intel/ocl/ocl_gpu_engine.hpp
+++ b/src/gpu/intel/ocl/ocl_gpu_engine.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -48,8 +48,8 @@ public:
            impl::stream_t **stream, impl::stream_impl_t *stream_impl) override;

    status_t create_kernel_from_binary(compute::kernel_t &kernel,
-            const xpu::binary_t &binary,
-            const char *kernel_name) const override;
+            const xpu::binary_t &binary, const char *kernel_name,
+            const compute::program_src_t &src) const override;

    status_t create_kernels_from_cache_blob(const cache_blob_t &cache_blob,
            std::vector<compute::kernel_t> &kernels,
@ -64,7 +64,8 @@ public:

    static status_t create_kernels_from_program(
            std::vector<compute::kernel_t> *kernels,
-            const std::vector<const char *> &kernel_names, cl_program program);
+            const std::vector<const char *> &kernel_names, cl_program program,
+            const compute::program_src_t &src);

    const impl_list_item_t *get_concat_implementation_list() const override {
        return gpu_impl_list_t::get_concat_implementation_list();
@ -100,6 +101,7 @@ public:
    }

    status_t create_program(xpu::ocl::wrapper_t<cl_program> &program,
+            compute::program_src_t &src,
            const std::vector<const char *> &kernel_names,
            const compute::kernel_ctx_t &kernel_ctx) const;

@ -111,7 +113,7 @@ protected:
    }

    status_t build_program_from_source(xpu::ocl::wrapper_t<cl_program> &program,
-            const char *code_string,
+            compute::program_src_t &src, const char *code_string,
            const compute::kernel_ctx_t &kernel_ctx) const;

    ~ocl_gpu_engine_t() override = default;
--- a/src/gpu/intel/ocl/ocl_gpu_kernel.cpp
+++ b/src/gpu/intel/ocl/ocl_gpu_kernel.cpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -111,9 +111,11 @@ private:
 };

 ocl_gpu_kernel_t::ocl_gpu_kernel_t(xpu::ocl::wrapper_t<cl_kernel> &&ocl_kernel,
-        const std::vector<gpu::intel::compute::scalar_type_t> &arg_types)
+        const std::vector<gpu::intel::compute::scalar_type_t> &arg_types,
+        compute::program_src_t src)
    : ocl_kernel_(std::move(ocl_kernel))
    , arg_types_(arg_types)
+    , src_(std::move(src))
    , save_events_(false) {
    cache_ = std::make_shared<ocl_gpu_kernel_cache_t>(ocl_kernel_);
 }
--- a/src/gpu/intel/ocl/ocl_gpu_kernel.hpp
+++ b/src/gpu/intel/ocl/ocl_gpu_kernel.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -35,7 +35,8 @@ class ocl_gpu_kernel_cache_t;
 class ocl_gpu_kernel_t : public compute::kernel_impl_t {
 public:
    ocl_gpu_kernel_t(xpu::ocl::wrapper_t<cl_kernel> &&ocl_kernel,
-            const std::vector<gpu::intel::compute::scalar_type_t> &arg_types);
+            const std::vector<gpu::intel::compute::scalar_type_t> &arg_types,
+            compute::program_src_t src);
    ~ocl_gpu_kernel_t() override = default;

    cl_kernel ocl_kernel() const { return ocl_kernel_; }
@ -59,11 +60,13 @@ public:

    status_t dump() const override;
    std::string name() const override;
+    const compute::program_src_t &src() const { return src_; }

 private:
    xpu::ocl::wrapper_t<cl_kernel> ocl_kernel_;
    std::vector<gpu::intel::compute::scalar_type_t> arg_types_;
    std::shared_ptr<ocl_gpu_kernel_cache_t> cache_;
+    compute::program_src_t src_;
    bool save_events_;
 };

--- a/src/gpu/intel/sycl/engine.hpp
+++ b/src/gpu/intel/sycl/engine.hpp
@ -65,7 +65,9 @@ public:

    status_t convert_to_sycl(
            std::vector<gpu::intel::compute::kernel_t> &kernels,
-            cl_program program, const std::vector<const char *> &kernel_names,
+            cl_program program,
+            const gpu::intel::compute::program_src_t &program_src,
+            const std::vector<const char *> &kernel_names,
            gpu::intel::ocl::ocl_gpu_engine_t *ocl_engine) const {
        kernels = std::vector<gpu::intel::compute::kernel_t>(
                kernel_names.size());
@ -80,7 +82,7 @@ public:
        for (size_t i = 0; i < kernel_names.size(); i++) {
            std::shared_ptr<gpu::intel::compute::kernel_impl_t> kernel_impl
                    = std::make_shared<sycl_interop_gpu_kernel_t>(
-                            std::move(sycl_kernels[i]));
+                            std::move(sycl_kernels[i]), program_src);
            kernels[i] = std::move(kernel_impl);
        }
        return status::success;
@ -100,14 +102,14 @@ public:
            xpu::binary_t binary;
            CHECK(k->get_binary(ocl_engine, binary));
            CHECK(create_kernel_from_binary(
-                    kernels[i], binary, kernel_names[i]));
+                    kernels[i], binary, kernel_names[i], k->src()));
        }
        return status::success;
    }

    status_t create_kernel_from_binary(gpu::intel::compute::kernel_t &kernel,
-            const xpu::binary_t &binary,
-            const char *kernel_name) const override {
+            const xpu::binary_t &binary, const char *kernel_name,
+            const gpu::intel::compute::program_src_t &src) const override {
        std::unique_ptr<::sycl::kernel> sycl_kernel;
        VCHECK_KERNEL(gpu::intel::sycl::compat::make_kernel(
                              sycl_kernel, kernel_name, this, binary),
@ -115,7 +117,7 @@ public:

        std::shared_ptr<gpu::intel::compute::kernel_impl_t> kernel_impl
                = std::make_shared<sycl_interop_gpu_kernel_t>(
-                        std::move(sycl_kernel));
+                        std::move(sycl_kernel), src);
        kernel = std::move(kernel_impl);
        return status::success;
    }
@ -156,7 +158,8 @@ public:
        auto kernel_name = jitter->kernel_name();

        xpu::binary_t kernel_binary = jitter->get_binary(ocl_engine.get());
-        return create_kernel_from_binary(*kernel, kernel_binary, kernel_name);
+        return create_kernel_from_binary(
+                *kernel, kernel_binary, kernel_name, {});
    }

    status_t create_kernels(std::vector<gpu::intel::compute::kernel_t> *kernels,
@ -173,10 +176,11 @@ public:
        CHECK(gpu::intel::sycl::create_ocl_engine(&ocl_engine, this));

        xpu::ocl::wrapper_t<cl_program> ocl_program;
+        gpu::intel::compute::program_src_t src;
        CHECK(ocl_engine->create_program(
-                ocl_program, kernel_names, kernel_ctx));
+                ocl_program, src, kernel_names, kernel_ctx));
        CHECK(convert_to_sycl(
-                *kernels, ocl_program, kernel_names, ocl_engine.get()));
+                *kernels, ocl_program, src, kernel_names, ocl_engine.get()));
        return status::success;
    }

--- a/src/gpu/intel/sycl/sycl_interop_gpu_kernel.hpp
+++ b/src/gpu/intel/sycl/sycl_interop_gpu_kernel.hpp
@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2019-2024 Intel Corporation
+* Copyright 2019-2025 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -29,8 +29,9 @@ namespace sycl {

 class sycl_interop_gpu_kernel_t : public gpu::intel::compute::kernel_impl_t {
 public:
-    sycl_interop_gpu_kernel_t(std::unique_ptr<::sycl::kernel> &&sycl_kernel)
-        : sycl_kernel_(std::move(sycl_kernel)) {}
+    sycl_interop_gpu_kernel_t(std::unique_ptr<::sycl::kernel> &&sycl_kernel,
+            gpu::intel::compute::program_src_t src)
+        : sycl_kernel_(std::move(sycl_kernel)), src_(src) {}

    ::sycl::kernel sycl_kernel() const { return *sycl_kernel_; }

@ -48,10 +49,12 @@ public:
    std::string name() const override {
        return sycl_kernel_->get_info<::sycl::info::kernel::function_name>();
    }
+    const compute::program_src_t &src() const { return src_; }

 private:
    std::unique_ptr<::sycl::kernel> sycl_kernel_;
    std::vector<gpu::intel::compute::scalar_type_t> arg_types_;
+    gpu::intel::compute::program_src_t src_;
 };

 } // namespace sycl