From 0fc3daade7b982884c60a6ac08c7a71f19cd0a5d Mon Sep 17 00:00:00 2001 From: Lev Kurilenko <113481193+lekurile@users.noreply.github.com> Date: Thu, 9 Jan 2025 12:11:35 -0800 Subject: [PATCH] Add position_ids arg to OPTEmbedding forward function (#6939) This PR updates the DeepSpeed `OPTEmbedding` forward function to include a new `positions_ids` argument. --------- Co-authored-by: Logan Adams --- .github/workflows/nv-ds-chat.yml | 2 +- deepspeed/module_inject/layers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nv-ds-chat.yml b/.github/workflows/nv-ds-chat.yml index 329a1060f..7e209cbe4 100644 --- a/.github/workflows/nv-ds-chat.yml +++ b/.github/workflows/nv-ds-chat.yml @@ -43,7 +43,7 @@ jobs: - name: Install deepspeed run: | - pip install transformers==4.45.2 + pip install transformers pip install .[dev] ds_report diff --git a/deepspeed/module_inject/layers.py b/deepspeed/module_inject/layers.py index 22d24820d..722ba413a 100644 --- a/deepspeed/module_inject/layers.py +++ b/deepspeed/module_inject/layers.py @@ -191,7 +191,7 @@ class OPTEmbedding(EmbeddingLayer): self.offset = 2 super().__init__(weight_shape, weight=weight) - def forward(self, attention_mask: torch.LongTensor, past_key_values_length: int = 0): + def forward(self, attention_mask: torch.LongTensor, past_key_values_length: int = 0, position_ids: int = 0): """`input_ids_shape` is expected to be [bsz x seqlen].""" attention_mask = attention_mask.long()