Files
DeepSpeed/deepspeed/inference/v2/kernels/ragged_ops/embed/embed.cuh
Connor Holmes 38b41dffa1 DeepSpeed-FastGen (#4604)
Co-authored-by: Jeff Rasley <jerasley@microsoft.com>
Co-authored-by: Michael Wyatt <michaelwyatt@microsoft.com>
Co-authored-by: Ammar Ahmad Awan <ammar.awan@microsoft.com>
Co-authored-by: Masahiro Tanaka <mtanaka@microsoft.com>
Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
2023-11-03 15:07:35 -07:00

27 lines
956 B
Plaintext

// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0
// DeepSpeed Team
#pragma once
#include "ds_kernel_utils.h"
#include "ragged_dtypes.h"
#ifdef BF16_AVAILABLE
#include <cuda_bf16.h>
#endif
template <typename TokenType, typename EmbedType>
void launch_ragged_embed_kernel(EmbedType* embedded_tokens,
const TokenType* input_ids,
const EmbedType* embedding_weight,
const EmbedType* position_weight,
const BatchWrapperCPP batch_desc,
const int32_t n_tokens,
const int32_t embed_dim,
const int32_t vocab_size,
const int32_t max_position_embed_idx,
const int32_t position_embed_offset,
cudaStream_t stream);