mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
Co-authored-by: Jeff Rasley <jerasley@microsoft.com> Co-authored-by: Michael Wyatt <michaelwyatt@microsoft.com> Co-authored-by: Ammar Ahmad Awan <ammar.awan@microsoft.com> Co-authored-by: Masahiro Tanaka <mtanaka@microsoft.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
27 lines
956 B
Plaintext
27 lines
956 B
Plaintext
// Copyright (c) Microsoft Corporation.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
// DeepSpeed Team
|
|
|
|
#pragma once
|
|
|
|
#include "ds_kernel_utils.h"
|
|
#include "ragged_dtypes.h"
|
|
|
|
#ifdef BF16_AVAILABLE
|
|
#include <cuda_bf16.h>
|
|
#endif
|
|
|
|
template <typename TokenType, typename EmbedType>
|
|
void launch_ragged_embed_kernel(EmbedType* embedded_tokens,
|
|
const TokenType* input_ids,
|
|
const EmbedType* embedding_weight,
|
|
const EmbedType* position_weight,
|
|
const BatchWrapperCPP batch_desc,
|
|
const int32_t n_tokens,
|
|
const int32_t embed_dim,
|
|
const int32_t vocab_size,
|
|
const int32_t max_position_embed_idx,
|
|
const int32_t position_embed_offset,
|
|
cudaStream_t stream);
|