mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
2 Commits
v4.52.1
...
run-fix-Pa
Author | SHA1 | Date | |
---|---|---|---|
4700bee289 | |||
a2d8e7258b |
2
.github/workflows/doctests.yml
vendored
2
.github/workflows/doctests.yml
vendored
@ -3,7 +3,7 @@ name: Doctests
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- run_doctest*
|
||||
- run-fix-Parameter-init
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
|
42
.github/workflows/self-scheduled-caller.yml
vendored
42
.github/workflows/self-scheduled-caller.yml
vendored
@ -7,17 +7,9 @@ on:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
- run-fix-Parameter-init
|
||||
|
||||
jobs:
|
||||
model-ci:
|
||||
name: Model CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_tests_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-models"
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
name: Torch pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
@ -25,35 +17,3 @@ jobs:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-torch"
|
||||
secrets: inherit
|
||||
|
||||
tf-pipeline:
|
||||
name: TF pipeline CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_pipelines_tf_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-pipeline-tf"
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
name: Example CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-examples"
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_all_tests_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-deepspeed"
|
||||
secrets: inherit
|
||||
|
||||
quantization-ci:
|
||||
name: Quantization CI
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_tests_quantization_torch_gpu
|
||||
slack_report_channel: "#transformers-ci-daily-quantization"
|
||||
secrets: inherit
|
||||
|
@ -822,7 +822,7 @@ class Data2VecAudioModel(Data2VecAudioPreTrainedModel):
|
||||
|
||||
# model only needs masking vector if mask prob is > 0.0
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
self.encoder = Data2VecAudioEncoder(config)
|
||||
|
||||
@ -858,7 +858,7 @@ class Data2VecAudioModel(Data2VecAudioPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -868,7 +868,7 @@ class Data2VecAudioModel(Data2VecAudioPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -974,7 +974,7 @@ class HubertModel(HubertPreTrainedModel):
|
||||
self.feature_projection = HubertFeatureProjection(config)
|
||||
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
if config.do_stable_layer_norm:
|
||||
self.encoder = HubertEncoderStableLayerNorm(config)
|
||||
@ -1005,7 +1005,7 @@ class HubertModel(HubertPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1015,7 +1015,7 @@ class HubertModel(HubertPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -834,7 +834,7 @@ class SEWModel(SEWPreTrainedModel):
|
||||
self.feature_dropout = nn.Dropout(config.feat_proj_dropout)
|
||||
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
self.encoder = SEWEncoder(config)
|
||||
|
||||
@ -862,7 +862,7 @@ class SEWModel(SEWPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -872,7 +872,7 @@ class SEWModel(SEWPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1360,7 +1360,7 @@ class SEWDModel(SEWDPreTrainedModel):
|
||||
self.feature_dropout = nn.Dropout(config.feat_proj_dropout)
|
||||
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
self.encoder = SEWDEncoder(config)
|
||||
|
||||
@ -1388,7 +1388,7 @@ class SEWDModel(SEWDPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1398,7 +1398,7 @@ class SEWDModel(SEWDPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -517,7 +517,7 @@ class SpeechT5SpeechEncoderPrenet(nn.Module):
|
||||
|
||||
# model only needs masking vector if mask prob is > 0.0
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
self.pos_conv_embed = SpeechT5PositionalConvEmbedding(config)
|
||||
self.pos_sinusoidal_embed = SpeechT5SinusoidalPositionalEmbedding(
|
||||
@ -616,7 +616,7 @@ class SpeechT5SpeechEncoderPrenet(nn.Module):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -626,7 +626,7 @@ class SpeechT5SpeechEncoderPrenet(nn.Module):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1090,7 +1090,7 @@ class UniSpeechModel(UniSpeechPreTrainedModel):
|
||||
self.feature_projection = UniSpeechFeatureProjection(config)
|
||||
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
if config.do_stable_layer_norm:
|
||||
self.encoder = UniSpeechEncoderStableLayerNorm(config)
|
||||
@ -1121,7 +1121,7 @@ class UniSpeechModel(UniSpeechPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1131,7 +1131,7 @@ class UniSpeechModel(UniSpeechPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1108,7 +1108,7 @@ class UniSpeechSatModel(UniSpeechSatPreTrainedModel):
|
||||
self.feature_extractor = UniSpeechSatFeatureEncoder(config)
|
||||
self.feature_projection = UniSpeechSatFeatureProjection(config)
|
||||
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
if config.do_stable_layer_norm:
|
||||
self.encoder = UniSpeechSatEncoderStableLayerNorm(config)
|
||||
@ -1139,7 +1139,7 @@ class UniSpeechSatModel(UniSpeechSatPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1149,7 +1149,7 @@ class UniSpeechSatModel(UniSpeechSatPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1445,7 +1445,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
|
||||
|
||||
# model only needs masking vector if mask prob is > 0.0
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
if config.do_stable_layer_norm:
|
||||
self.encoder = Wav2Vec2EncoderStableLayerNorm(config)
|
||||
@ -1496,7 +1496,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1506,7 +1506,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1053,7 +1053,7 @@ class Wav2Vec2BertModel(Wav2Vec2BertPreTrainedModel):
|
||||
|
||||
# model only needs masking vector if mask prob is > 0.0
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
self.encoder = Wav2Vec2BertEncoder(config)
|
||||
|
||||
@ -1087,7 +1087,7 @@ class Wav2Vec2BertModel(Wav2Vec2BertPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1097,7 +1097,7 @@ class Wav2Vec2BertModel(Wav2Vec2BertPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1235,7 +1235,7 @@ class Wav2Vec2ConformerModel(Wav2Vec2ConformerPreTrainedModel):
|
||||
|
||||
# model only needs masking vector if mask prob is > 0.0
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
self.encoder = Wav2Vec2ConformerEncoder(config)
|
||||
|
||||
@ -1273,7 +1273,7 @@ class Wav2Vec2ConformerModel(Wav2Vec2ConformerPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1283,7 +1283,7 @@ class Wav2Vec2ConformerModel(Wav2Vec2ConformerPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
@ -1107,7 +1107,7 @@ class WavLMModel(WavLMPreTrainedModel):
|
||||
|
||||
# model only needs masking vector if mask prob is > 0.0
|
||||
if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
|
||||
self.masked_spec_embed = nn.Parameter(torch.FloatTensor(config.hidden_size).uniform_())
|
||||
self.masked_spec_embed = nn.Parameter(torch.Tensor(config.hidden_size).uniform_())
|
||||
|
||||
if config.do_stable_layer_norm:
|
||||
self.encoder = WavLMEncoderStableLayerNorm(config)
|
||||
@ -1158,7 +1158,7 @@ class WavLMModel(WavLMPreTrainedModel):
|
||||
|
||||
if mask_time_indices is not None:
|
||||
# apply SpecAugment along time axis with given mask_time_indices
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
elif self.config.mask_time_prob > 0 and self.training:
|
||||
mask_time_indices = _compute_mask_indices(
|
||||
(batch_size, sequence_length),
|
||||
@ -1168,7 +1168,7 @@ class WavLMModel(WavLMPreTrainedModel):
|
||||
min_masks=self.config.mask_time_min_masks,
|
||||
)
|
||||
mask_time_indices = torch.tensor(mask_time_indices, device=hidden_states.device, dtype=torch.bool)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
|
||||
hidden_states[mask_time_indices] = self.masked_spec_embed
|
||||
|
||||
if self.config.mask_feature_prob > 0 and self.training:
|
||||
# generate indices & apply SpecAugment along feature axis
|
||||
|
Reference in New Issue
Block a user