Compare commits

...

1 Commits

Author SHA1 Message Date
b74650c5d5 run doctest 2023-10-16 [PR #26627] torch 2.1.0 + not run idefics.md 2023-10-30 20:56:11 +01:00
4 changed files with 52 additions and 80 deletions

View File

@ -3,7 +3,7 @@ name: Doctests
on:
push:
branches:
- doctest*
- run_fc639143
repository_dispatch:
schedule:
- cron: "17 2 * * *"
@ -20,7 +20,7 @@ env:
jobs:
run_doctests:
runs-on: [single-gpu, nvidia-gpu, t4, doctest-ci]
runs-on: [single-gpu, nvidia-gpu, yih-dar-shieh-debug-doctest, doctest-ci]
container:
image: huggingface/transformers-all-latest-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@ -40,6 +40,16 @@ jobs:
run: |
python3 utils/print_env.py
- name: Uninstall and reInstall torch
run: |
python3 -m pip install -U torch==2.1.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
python3 -m pip uninstall -y accelerate && python3 -m pip install -U accelerate@git+https://github.com/huggingface/accelerate@c3ec7ff5a9e1e3bddd3be0061e6362ff1bcb08a1
python3 -m pip uninstall -y peft && python3 -m pip install -U peft@git+https://github.com/huggingface/peft@2464c572eba6b60a9d19ba1913fcec6bc0a2724b
python3 -m pip install -U alembic==1.12.0 dash==2.14.0 Flask==2.2.5 onnx==1.14.1 plac==1.4.0 plotly==5.17.0 Werkzeug==2.2.3
python3 -m pip uninstall -y blinker
# python3 -m pip install -U cmake==3.25.0
# python3 -m pip install -U lit==15.0.7
- name: Show installed libraries and their versions
run: pip freeze

View File

@ -297,9 +297,14 @@ class Blip2PreTrainedModel(PreTrainedModel):
elif isinstance(module, nn.Linear) and module.bias is not None:
module.bias.data.zero_()
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, Blip2Encoder):
module.gradient_checkpointing = value
def _set_gradient_checkpointing(self, module, gradient_checkpointing_func=None):
if isinstance(module, (Blip2Encoder, Blip2QFormerEncoder)):
module.gradient_checkpointing_func = gradient_checkpointing_func
module.gradient_checkpointing = gradient_checkpointing_func is not None
# Enable / disable GC for the language model as well
if hasattr(self, "language_model") and hasattr(self.language_model, "_set_gradient_checkpointing"):
self.language_model._set_gradient_checkpointing(module, gradient_checkpointing_func)
BLIP_2_START_DOCSTRING = r"""
@ -473,17 +478,11 @@ class Blip2Encoder(nn.Module):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if self.gradient_checkpointing and self.training:
def create_custom_forward(module):
def custom_forward(*inputs):
return module(*inputs, output_attentions)
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(encoder_layer),
layer_outputs = self.gradient_checkpointing_func(
encoder_layer.__call__,
hidden_states,
attention_mask,
output_attentions,
)
else:
layer_outputs = encoder_layer(
@ -944,15 +943,8 @@ class Blip2QFormerEncoder(nn.Module):
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
)
use_cache = False
def create_custom_forward(module):
def custom_forward(*inputs):
return module(*inputs, past_key_value, output_attentions, query_length)
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(layer_module),
layer_outputs = self.gradient_checkpointing_func(
layer_module.__call__,
hidden_states,
attention_mask,
layer_head_mask,
@ -1272,14 +1264,10 @@ class Blip2Model(Blip2PreTrainedModel):
>>> import torch
>>> from transformers import AutoTokenizer, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> tokenizer = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt").to(device)
>>> inputs = tokenizer(["a photo of a cat"], padding=True, return_tensors="pt")
>>> text_features = model.get_text_features(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@ -1333,16 +1321,12 @@ class Blip2Model(Blip2PreTrainedModel):
>>> import requests
>>> from transformers import AutoProcessor, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> inputs = processor(images=image, return_tensors="pt")
>>> image_outputs = model.get_image_features(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@ -1381,15 +1365,12 @@ class Blip2Model(Blip2PreTrainedModel):
>>> import requests
>>> from transformers import Blip2Processor, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> inputs = processor(images=image, return_tensors="pt")
>>> qformer_outputs = model.get_qformer_features(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@ -1654,34 +1635,7 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
Examples:
Image captioning (without providing a text prompt):
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
>>> import torch
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16
... )
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> generated_ids = model.generate(**inputs)
>>> generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
>>> print(generated_text)
two cats laying on a couch
```
Visual question answering (prompt = question):
Prepare processor, model and image input
```python
>>> from PIL import Image
@ -1698,7 +1652,22 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
```
Image captioning (without providing a text prompt):
```python
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> generated_ids = model.generate(**inputs)
>>> generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
>>> print(generated_text)
two cats laying on a couch
```
Visual question answering (prompt = question):
```python
>>> prompt = "Question: how many cats are there? Answer:"
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16)
@ -1712,20 +1681,10 @@ class Blip2ForConditionalGeneration(Blip2PreTrainedModel):
This greatly reduces the amount of memory used by the model while maintaining the same performance.
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
>>> import torch
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
>>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-flan-t5-xl", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
... ) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> prompt = "Question: how many cats are there? Answer:"
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16)
>>> generated_ids = model.generate(**inputs)

View File

@ -16,7 +16,7 @@
import argparse
import bros # original repo
# import bros # original repo
import torch
from transformers import BrosConfig, BrosModel, BrosProcessor

View File

@ -1,3 +1,6 @@
src/transformers/models/bros/convert_bros_to_pytorch.py
src/transformers/models/persimmon/modeling_persimmon.py
docs/source/en/tasks/idefics.md
docs/source/en/_config.py
docs/source/en/accelerate.md
docs/source/en/add_new_model.md