Adapt and test huggingface_hub v1.0.0 (#40889)

* Adapt and test huggingface_hub v1.0.0.rc0

* forgot to bump hfh

* bump

* code quality

* code quality

* relax dependency table

* fix has_file

* install hfh 1.0.0.rc0 in circle ci jobs

* repostiryo

* push to hub now returns a commit url

* catch HfHubHTTPError

* check commit on branch

* add it back

* fix ?

* remove deprecated test

* uncomment another test

* trigger

* no proxies

* many more small changes

* fix load PIL Image from httpx

* require 1.0.0.rc0

* fix mocked tests

* fix others

* unchange

* unchange

* args

* Update .circleci/config.yml

* Bump to 1.0.0.rc1

* bump kernels version

* fix deps
This commit is contained in:
Lucain
2025-09-25 13:13:50 +02:00
committed by GitHub
parent 750dd2a401
commit 44682e7131
38 changed files with 142 additions and 187 deletions

View File

@ -114,12 +114,12 @@ _deps = [
"GitPython<3.1.19",
"hf-doc-builder>=0.3.0",
"hf_xet",
"huggingface-hub>=0.34.0,<1.0",
"huggingface-hub==1.0.0.rc1",
"importlib_metadata",
"ipadic>=1.0.0,<2.0",
"jinja2>=3.1.0",
"kenlm",
"kernels>=0.6.1,<=0.9",
"kernels>=0.10.2,<0.11",
"librosa",
"natten>=0.14.6,<0.15.0",
"nltk<=3.8.1",

View File

@ -25,11 +25,8 @@ from collections.abc import Sequence
from io import BytesIO
from typing import TYPE_CHECKING, Any, Optional, Union
if TYPE_CHECKING:
import torch
import httpx
import numpy as np
import requests
from packaging import version
from .utils import (
@ -42,6 +39,9 @@ from .utils import (
)
if TYPE_CHECKING:
import torch
if is_soundfile_available():
import soundfile as sf
@ -132,7 +132,9 @@ def load_audio_librosa(audio: Union[str, np.ndarray], sampling_rate=16000, timeo
# Load audio from URL (e.g https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/translate_to_chinese.wav)
if audio.startswith("http://") or audio.startswith("https://"):
audio = librosa.load(BytesIO(requests.get(audio, timeout=timeout).content), sr=sampling_rate)[0]
audio = librosa.load(
BytesIO(httpx.get(audio, follow_redirects=True, timeout=timeout).content), sr=sampling_rate
)[0]
elif os.path.isfile(audio):
audio = librosa.load(audio, sr=sampling_rate)[0]
return audio
@ -174,7 +176,7 @@ def load_audio_as(
# Load audio bytes from URL or file
audio_bytes = None
if audio.startswith(("http://", "https://")):
response = requests.get(audio, timeout=timeout)
response = httpx.get(audio, follow_redirects=True, timeout=timeout)
response.raise_for_status()
audio_bytes = response.content
elif os.path.isfile(audio):

View File

@ -23,12 +23,12 @@ deps = {
"GitPython": "GitPython<3.1.19",
"hf-doc-builder": "hf-doc-builder>=0.3.0",
"hf_xet": "hf_xet",
"huggingface-hub": "huggingface-hub>=0.34.0,<1.0",
"huggingface-hub": "huggingface-hub==1.0.0.rc1",
"importlib_metadata": "importlib_metadata",
"ipadic": "ipadic>=1.0.0,<2.0",
"jinja2": "jinja2>=3.1.0",
"kenlm": "kenlm",
"kernels": "kernels>=0.6.1,<=0.9",
"kernels": "kernels>=0.10.2,<0.11",
"librosa": "librosa",
"natten": "natten>=0.14.6,<0.15.0",
"nltk": "nltk<=3.8.1",

View File

@ -17,9 +17,6 @@ File utilities: utilities related to download and cache models
This module should not be update anymore and is only left for backward compatibility.
"""
from huggingface_hub import get_full_repo_name # for backward compatibility
from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEMETRY # for backward compatibility
from . import __version__
# Backward compatibility imports, to make sure all those objects can be found in file_utils

View File

@ -19,8 +19,8 @@ from dataclasses import dataclass
from io import BytesIO
from typing import Optional, Union
import httpx
import numpy as np
import requests
from .utils import (
ExplicitEnum,
@ -462,7 +462,7 @@ def load_image(image: Union[str, "PIL.Image.Image"], timeout: Optional[float] =
if image.startswith("http://") or image.startswith("https://"):
# We need to actually check for a real protocol, otherwise it's impossible to use a local file
# like http_huggingface_co.png
image = PIL.Image.open(BytesIO(requests.get(image, timeout=timeout).content))
image = PIL.Image.open(BytesIO(httpx.get(image, timeout=timeout, follow_redirects=True).content))
elif os.path.isfile(image):
image = PIL.Image.open(image)
else:

View File

@ -21,7 +21,7 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional, Union
import requests
import httpx
import yaml
from huggingface_hub import model_info
from huggingface_hub.errors import OfflineModeIsEnabled
@ -380,12 +380,7 @@ class TrainingSummary:
for tag in info.tags:
if tag.startswith("license:"):
self.license = tag[8:]
except (
requests.exceptions.HTTPError,
requests.exceptions.ConnectionError,
HFValidationError,
OfflineModeIsEnabled,
):
except (httpx.HTTPError, HFValidationError, OfflineModeIsEnabled):
pass
def create_model_index(self, metric_mapping):

View File

@ -14,8 +14,8 @@
import subprocess
from typing import Any, Union
import httpx
import numpy as np
import requests
from ..utils import add_end_docstrings, is_torch_available, is_torchaudio_available, is_torchcodec_available, logging
from .base import Pipeline, build_pipeline_init_args
@ -168,7 +168,7 @@ class AudioClassificationPipeline(Pipeline):
if inputs.startswith("http://") or inputs.startswith("https://"):
# We need to actually check for a real protocol, otherwise it's impossible to use a local file
# like http_huggingface_co.png
inputs = requests.get(inputs).content
inputs = httpx.get(inputs, follow_redirects=True).content
else:
with open(inputs, "rb") as f:
inputs = f.read()

View File

@ -14,8 +14,8 @@
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Optional, Union
import httpx
import numpy as np
import requests
from ..generation import GenerationConfig
from ..tokenization_utils import PreTrainedTokenizer
@ -355,7 +355,7 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
if inputs.startswith("http://") or inputs.startswith("https://"):
# We need to actually check for a real protocol, otherwise it's impossible to use a local file
# like http_huggingface_co.png
inputs = requests.get(inputs).content
inputs = httpx.get(inputs, follow_redirects=True).content
else:
with open(inputs, "rb") as f:
inputs = f.read()

View File

@ -46,12 +46,13 @@ class ImageToImagePipeline(Pipeline):
```python
>>> from PIL import Image
>>> import requests
>>> import httpx
>>> import io
>>> from transformers import pipeline
>>> upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64")
>>> img = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw)
>>> img = Image.open(io.BytesIO(httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg").content))
>>> img = img.resize((64, 64))
>>> upscaled_img = upscaler(img)
>>> img.size

View File

@ -15,7 +15,7 @@ import warnings
from io import BytesIO
from typing import Any, Optional, Union, overload
import requests
import httpx
from ..utils import (
add_end_docstrings,
@ -142,7 +142,7 @@ class VideoClassificationPipeline(Pipeline):
num_frames = self.model.config.num_frames
if video.startswith("http://") or video.startswith("https://"):
video = BytesIO(requests.get(video).content)
video = BytesIO(httpx.get(video, follow_redirects=True).content)
container = av.open(video)

View File

@ -15,8 +15,8 @@
from collections import UserDict
from typing import Any, Union
import httpx
import numpy as np
import requests
from ..utils import (
add_end_docstrings,
@ -107,7 +107,7 @@ class ZeroShotAudioClassificationPipeline(Pipeline):
if audio.startswith("http://") or audio.startswith("https://"):
# We need to actually check for a real protocol, otherwise it's impossible to use a local file
# like http_huggingface_co.png
audio = requests.get(audio).content
audio = httpx.get(audio, follow_redirects=True).content
else:
with open(audio, "rb") as f:
audio = f.read()

View File

@ -1,6 +1,6 @@
from typing import Optional
import requests
import httpx
from huggingface_hub import Discussion, HfApi, get_repo_discussions
from .utils import cached_file, http_user_agent, logging
@ -44,10 +44,10 @@ def spawn_conversion(token: str, private: bool, model_id: str):
data = {"data": [model_id, private, token]}
result = requests.post(sse_url, stream=True, json=data).json()
result = httpx.post(sse_url, follow_redirects=True, json=data).json()
event_id = result["event_id"]
with requests.get(f"{sse_url}/{event_id}", stream=True) as sse_connection:
with httpx.stream("GET", f"{sse_url}/{event_id}") as sse_connection:
try:
logger.debug("Spawning safetensors automatic conversion.")
start(sse_connection)

View File

@ -45,10 +45,9 @@ from typing import Any, Callable, Optional, Union
from unittest import mock
from unittest.mock import patch
import huggingface_hub.utils
import requests
import httpx
import urllib3
from huggingface_hub import delete_repo
from huggingface_hub import create_repo, delete_repo
from packaging import version
from transformers import Trainer
@ -1848,7 +1847,7 @@ class TemporaryHubRepo:
repo_id = Path(tmp_dir).name
if namespace is not None:
repo_id = f"{namespace}/{repo_id}"
self.repo_url = huggingface_hub.create_repo(repo_id, token=self.token)
self.repo_url = create_repo(repo_id, token=self.token)
def __enter__(self):
return self.repo_url
@ -2660,13 +2659,14 @@ def hub_retry(max_attempts: int = 5, wait_before_retry: Optional[float] = 2):
while retry_count < max_attempts:
try:
return test_func_ref(*args, **kwargs)
# We catch all exceptions related to network issues from requests
# We catch all exceptions related to network issues from httpx
except (
requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
requests.exceptions.ReadTimeout,
requests.exceptions.HTTPError,
requests.exceptions.RequestException,
httpx.HTTPError,
httpx.RequestError,
httpx.TimeoutException,
httpx.ReadTimeout,
httpx.ConnectError,
httpx.NetworkError,
) as err:
logger.error(
f"Test failed with {err} at try {retry_count}/{max_attempts} as it couldn't connect to the specified Hub repository."

View File

@ -49,7 +49,7 @@ import huggingface_hub.utils as hf_hub_utils
import numpy as np
import torch
import torch.distributed as dist
from huggingface_hub import ModelCard, create_repo, upload_folder
from huggingface_hub import CommitInfo, ModelCard, create_repo, upload_folder
from packaging import version
from torch import nn
from torch.utils.data import DataLoader, Dataset, IterableDataset, RandomSampler, SequentialSampler
@ -5117,7 +5117,7 @@ class Trainer:
token: Optional[str] = None,
revision: Optional[str] = None,
**kwargs,
) -> str:
) -> CommitInfo:
"""
Upload `self.model` and `self.processing_class` to the 🤗 model hub on the repo `self.args.hub_model_id`.

View File

@ -16,8 +16,6 @@
from functools import lru_cache
from huggingface_hub import get_full_repo_name # for backward compatibility
from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEMETRY # for backward compatibility
from packaging import version
from .. import __version__

View File

@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import requests
import httpx
from PIL import Image
from ..masking_utils import create_causal_mask
@ -180,7 +180,7 @@ class AttentionMaskVisualizer:
image_seq_length = None
if self.config.model_type in PROCESSOR_MAPPING_NAMES:
img = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true"
img = Image.open(requests.get(img, stream=True).raw)
img = Image.open(io.BytesIO(httpx.get(img, follow_redirects=True).content))
image_seq_length = 5
processor = AutoProcessor.from_pretrained(self.repo_id, image_seq_length=image_seq_length)
if hasattr(processor, "image_token"):

View File

@ -27,8 +27,8 @@ from typing import Optional, Union
from urllib.parse import urlparse
from uuid import uuid4
import httpx
import huggingface_hub
import requests
from huggingface_hub import (
_CACHED_NO_EXIST,
CommitOperationAdd,
@ -58,7 +58,6 @@ from huggingface_hub.utils import (
hf_raise_for_status,
send_telemetry,
)
from requests.exceptions import HTTPError
from . import __version__, logging
from .generic import working_or_temp_dir
@ -176,7 +175,7 @@ def list_repo_templates(
]
except (GatedRepoError, RepositoryNotFoundError, RevisionNotFoundError):
raise # valid errors => do not catch
except (HTTPError, OfflineModeIsEnabled, requests.exceptions.ConnectionError):
except (HfHubHTTPError, OfflineModeIsEnabled, httpx.NetworkError):
pass # offline mode, internet down, etc. => try local files
# check local files
@ -199,7 +198,7 @@ def is_remote_url(url_or_filename):
def define_sagemaker_information():
try:
instance_data = requests.get(os.environ["ECS_CONTAINER_METADATA_URI"]).json()
instance_data = httpx.get(os.environ["ECS_CONTAINER_METADATA_URI"]).json()
dlc_container_used = instance_data["Image"]
dlc_tag = instance_data["Image"].split(":")[1]
except Exception:
@ -554,7 +553,7 @@ def cached_files(
) from e
# snapshot_download will not raise EntryNotFoundError, but hf_hub_download can. If this is the case, it will be treated
# later on anyway and re-raised if needed
elif isinstance(e, HTTPError) and not isinstance(e, EntryNotFoundError):
elif isinstance(e, HfHubHTTPError) and not isinstance(e, EntryNotFoundError):
if not _raise_exceptions_for_connection_errors:
return None
raise OSError(f"There was a specific connection error when trying to load {path_or_repo_id}:\n{e}") from e
@ -677,18 +676,13 @@ def has_file(
response = get_session().head(
hf_hub_url(path_or_repo, filename=filename, revision=revision, repo_type=repo_type),
headers=build_hf_headers(token=token, user_agent=http_user_agent()),
allow_redirects=False,
proxies=proxies,
follow_redirects=False,
timeout=10,
)
except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
except httpx.ProxyError:
# Actually raise for those subclasses of ConnectionError
raise
except (
requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
OfflineModeIsEnabled,
):
except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled):
return has_file_in_cache
try:
@ -712,7 +706,7 @@ def has_file(
) from e
except EntryNotFoundError:
return False # File does not exist
except requests.HTTPError:
except HfHubHTTPError:
# Any authentication/authorization error will be caught here => default to cache
return has_file_in_cache

View File

@ -22,8 +22,8 @@ from io import BytesIO
from typing import Callable, NewType, Optional, Union
from urllib.parse import urlparse
import httpx
import numpy as np
import requests
from .image_transforms import PaddingMode, to_channel_dimension_format
from .image_utils import ChannelDimension, infer_channel_dimension_format, is_valid_image
@ -683,7 +683,7 @@ def load_video(
bytes_obj = buffer.getvalue()
file_obj = BytesIO(bytes_obj)
elif video.startswith("http://") or video.startswith("https://"):
file_obj = BytesIO(requests.get(video).content)
file_obj = BytesIO(httpx.get(video, follow_redirects=True).content)
elif os.path.isfile(video):
file_obj = video
else:

View File

@ -19,7 +19,7 @@ from threading import Thread
from unittest.mock import patch
import aiohttp.client_exceptions
import requests
import httpx
from huggingface_hub import AsyncInferenceClient, ChatCompletionStreamOutput
from parameterized import parameterized
@ -509,17 +509,18 @@ def _call_healthcheck(base_url: str):
retries = 10
while retries > 0:
try:
response = requests.get(f"{base_url}/health")
response = httpx.get(f"{base_url}/health")
break
except requests.exceptions.ConnectionError:
except httpx.NetworkError:
time.sleep(0.1)
retries -= 1
return response
def _open_stream_and_cancel(base_url: str, request_id: str):
with requests.Session() as s:
with s.post(
with httpx.Client() as s:
with s.stream(
"POST",
f"{base_url}/v1/chat/completions",
headers={"X-Request-ID": request_id},
json={
@ -527,13 +528,12 @@ def _open_stream_and_cancel(base_url: str, request_id: str):
"stream": True,
"messages": [{"role": "user", "content": "Count slowly so I can cancel you."}],
},
stream=True,
timeout=30,
) as resp:
assert resp.status_code == 200
wait_for_n_chunks = 3
for i, _ in enumerate(resp.iter_content(chunk_size=None)):
for i, _ in enumerate(resp.iter_bytes(chunk_size=None)):
if i >= wait_for_n_chunks:
resp.close()
break

View File

@ -19,7 +19,7 @@ import tempfile
import unittest
import warnings
from huggingface_hub import HfFolder, create_pull_request
from huggingface_hub import create_pull_request
from parameterized import parameterized
from transformers import AutoConfig, GenerationConfig, WatermarkingConfig, is_torch_available
@ -688,7 +688,6 @@ class ConfigPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:

View File

@ -13,9 +13,6 @@
# limitations under the License.
import copy
import os
import os.path
import shutil
import sys
import tempfile
import unittest
@ -23,7 +20,6 @@ from collections import OrderedDict
from pathlib import Path
import pytest
from huggingface_hub import Repository
import transformers
from transformers import BertConfig, GPT2Model, is_safetensors_available, is_torch_available
@ -42,7 +38,6 @@ from ..bert.test_modeling_bert import BertModelTester
sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils"))
from test_module.custom_configuration import CustomConfig # noqa E402
from utils.fetch_hub_objects_for_ci import url_to_local_path
if is_torch_available():
@ -562,26 +557,6 @@ class AutoModelTest(unittest.TestCase):
_MODEL_MAPPING = _LazyAutoMapping(_CONFIG_MAPPING_NAMES, _MODEL_MAPPING_NAMES)
self.assertEqual(_MODEL_MAPPING[BertConfig], GPT2Model)
def test_dynamic_saving_from_local_repo(self):
with tempfile.TemporaryDirectory() as tmp_dir, tempfile.TemporaryDirectory() as tmp_dir_out:
# `Repository` is deprecated and will be removed in `huggingface_hub v1.0`.
# TODO: Remove this test when this comes.
# Here is a ugly approach to avoid `too many requests`
repo_id = url_to_local_path("hf-internal-testing/tiny-random-custom-architecture")
if os.path.isdir(repo_id):
shutil.copytree(repo_id, tmp_dir, dirs_exist_ok=True)
else:
_ = Repository(
local_dir=tmp_dir,
clone_from=url_to_local_path("hf-internal-testing/tiny-random-custom-architecture"),
)
model = AutoModelForCausalLM.from_pretrained(tmp_dir, trust_remote_code=True)
model.save_pretrained(tmp_dir_out)
_ = AutoModelForCausalLM.from_pretrained(tmp_dir_out, trust_remote_code=True)
self.assertTrue((Path(tmp_dir_out) / "modeling_fake_custom.py").is_file())
self.assertTrue((Path(tmp_dir_out) / "configuration_fake_custom.py").is_file())
def test_custom_model_patched_generation_inheritance(self):
"""
Tests that our inheritance patching for generate-compatible models works as expected. Without this feature,

View File

@ -20,7 +20,7 @@ import unittest
from pathlib import Path
from shutil import copyfile
from huggingface_hub import HfFolder, Repository
from huggingface_hub import snapshot_download, upload_folder
import transformers
from transformers import (
@ -423,7 +423,6 @@ class ProcessorPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub_via_save_pretrained(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:
@ -471,7 +470,7 @@ class ProcessorPushToHubTester(unittest.TestCase):
processor = CustomProcessor(feature_extractor, tokenizer)
with tempfile.TemporaryDirectory() as tmp_dir:
repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token)
snapshot_download(tmp_repo.repo_id, token=self._token)
processor.save_pretrained(tmp_dir)
# This has added the proper auto_map field to the feature extractor config
@ -499,7 +498,7 @@ class ProcessorPushToHubTester(unittest.TestCase):
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_tokenization.py")))
self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_processing.py")))
repo.push_to_hub()
upload_folder(repo_id=tmp_repo.repo_id, folder_path=tmp_dir, token=self._token)
new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True)
# Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module

View File

@ -21,8 +21,8 @@ import unittest
from pathlib import Path
import datasets
from huggingface_hub import HfFolder, Repository, delete_repo
from requests.exceptions import HTTPError
from huggingface_hub import delete_repo, snapshot_download
from huggingface_hub.errors import HfHubHTTPError
from transformers import (
AutomaticSpeechRecognitionPipeline,
@ -209,7 +209,7 @@ class CommonPipelineTest(unittest.TestCase):
@require_torch
def test_auto_model_pipeline_registration_from_local_dir(self):
with tempfile.TemporaryDirectory() as tmp_dir:
_ = Repository(local_dir=tmp_dir, clone_from="hf-internal-testing/tiny-random-custom-architecture")
snapshot_download("hf-internal-testing/tiny-random-custom-architecture", local_dir=tmp_dir)
pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True)
self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load
@ -874,13 +874,12 @@ class DynamicPipelineTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@classmethod
def tearDownClass(cls):
try:
delete_repo(token=cls._token, repo_id="test-dynamic-pipeline")
except HTTPError:
except HfHubHTTPError:
pass
@unittest.skip("Broken, TODO @Yih-Dar")

View File

@ -11,13 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import tempfile
import unittest
import datasets
import httpx
import numpy as np
import requests
from datasets import load_dataset
from huggingface_hub import ImageSegmentationOutputElement
from huggingface_hub.utils import insecure_hashlib
@ -318,7 +318,9 @@ class ImageSegmentationPipelineTests(unittest.TestCase):
]
# actual links to get files
expected_masks = [x.replace("/blob/", "/resolve/") for x in expected_masks]
expected_masks = [Image.open(requests.get(image, stream=True).raw) for image in expected_masks]
expected_masks = [
Image.open(io.BytesIO(httpx.get(image, follow_redirects=True).content)) for image in expected_masks
]
# Convert masks to numpy array
output_masks = [np.array(x) for x in output_masks]

View File

@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import unittest
import requests
import httpx
from transformers import MODEL_FOR_VISION_2_SEQ_MAPPING, is_vision_available
from transformers.pipelines import ImageToTextPipeline, pipeline
@ -172,7 +172,7 @@ class ImageToTextPipelineTests(unittest.TestCase):
def test_generation_pt_blip(self):
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
image = Image.open(requests.get(url, stream=True).raw)
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
outputs = pipe(image)
self.assertEqual(outputs, [{"generated_text": "a pink pokemon pokemon with a blue shirt and a blue shirt"}])
@ -182,7 +182,7 @@ class ImageToTextPipelineTests(unittest.TestCase):
def test_generation_pt_git(self):
pipe = pipeline("image-to-text", model="microsoft/git-base-coco")
url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png"
image = Image.open(requests.get(url, stream=True).raw)
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
outputs = pipe(image)
self.assertEqual(outputs, [{"generated_text": "a cartoon of a purple character."}])
@ -192,7 +192,7 @@ class ImageToTextPipelineTests(unittest.TestCase):
def test_conditional_generation_pt_blip(self):
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(requests.get(url, stream=True).raw)
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
prompt = "a photography of"
@ -207,7 +207,7 @@ class ImageToTextPipelineTests(unittest.TestCase):
def test_conditional_generation_pt_git(self):
pipe = pipeline("image-to-text", model="microsoft/git-base-coco")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(requests.get(url, stream=True).raw)
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
prompt = "a photo of a"
@ -222,7 +222,7 @@ class ImageToTextPipelineTests(unittest.TestCase):
def test_conditional_generation_pt_pix2struct(self):
pipe = pipeline("image-to-text", model="google/pix2struct-ai2d-base")
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
image = Image.open(requests.get(url, stream=True).raw)
image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content))
prompt = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"

View File

@ -11,8 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import io
import json
import os
import pathlib
@ -22,9 +22,9 @@ import unittest
import warnings
from copy import deepcopy
import httpx
import numpy as np
import pytest
import requests
from packaging import version
from transformers import AutoImageProcessor, BatchFeature
@ -182,7 +182,9 @@ class ImageProcessingTestMixin:
self.skipTest(reason="Skipping slow/fast equivalence test as one of the image processors is not defined")
dummy_image = Image.open(
requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw
io.BytesIO(
httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg", follow_redirects=True).content
)
)
image_processor_slow = self.image_processing_class(**self.image_processor_dict)
image_processor_fast = self.fast_image_processing_class(**self.image_processor_dict)

View File

@ -34,7 +34,7 @@ if is_mistral_common_available():
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from mistral_common.tokens.tokenizers.utils import list_local_hf_repo_files
# To avoid unnecessary `requests.get` calls which give us `Error: Too Many Requests for url` on CircleCI
# To avoid unnecessary `httpx.get` calls which give us `Error: Too Many Requests for url` on CircleCI
mistral_common.tokens.tokenizers.image.download_image = load_image

View File

@ -32,7 +32,7 @@ from unittest.mock import Mock, patch
import numpy as np
import pytest
from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files
from huggingface_hub import ModelCard, create_branch, list_repo_commits, list_repo_files
from packaging import version
from parameterized import parameterized
@ -5284,7 +5284,6 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:
@ -5469,14 +5468,10 @@ class TrainerIntegrationWithHubTester(unittest.TestCase):
)
branch = "v1.0"
create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True)
url = trainer.push_to_hub(revision=branch)
push_commit = trainer.push_to_hub(revision=branch)
# Extract branch from the url
re_search = re.search(r"tree/([^/]+)/", url)
self.assertIsNotNone(re_search)
branch_name = re_search.groups()[0]
self.assertEqual(branch_name, branch)
commits = list_repo_commits(repo_id=trainer.hub_model_id, revision=branch, token=self._token)
self.assertEqual(commits[0].commit_id, push_commit.oid)
@require_torch

View File

@ -21,8 +21,7 @@ import unittest.mock as mock
import warnings
from pathlib import Path
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
import httpx
from transformers import AutoConfig, BertConfig, Florence2Config, GPT2Config
from transformers.configuration_utils import PretrainedConfig
@ -93,7 +92,6 @@ class ConfigPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:
@ -222,14 +220,16 @@ class ConfigTestUtils(unittest.TestCase):
response_mock = mock.Mock()
response_mock.status_code = 500
response_mock.headers = {}
response_mock.raise_for_status.side_effect = HTTPError
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
"failed", request=mock.Mock(), response=mock.Mock()
)
response_mock.json.return_value = {}
# Download this model to make sure it's in the cache.
_ = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert")
# Under the mock environment we get a 500 error when trying to reach the model.
with mock.patch("requests.Session.request", return_value=response_mock) as mock_head:
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
_ = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert")
# This check we did call the fake head request
mock_head.assert_called()

View File

@ -19,8 +19,7 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
import httpx
from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test
@ -40,13 +39,15 @@ class FeatureExtractorUtilTester(unittest.TestCase):
response_mock = mock.Mock()
response_mock.status_code = 500
response_mock.headers = {}
response_mock.raise_for_status.side_effect = HTTPError
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
"failed", request=mock.Mock(), response=mock.Mock()
)
response_mock.json.return_value = {}
# Download this model to make sure it's in the cache.
_ = Wav2Vec2FeatureExtractor.from_pretrained("hf-internal-testing/tiny-random-wav2vec2")
# Under the mock environment we get a 500 error when trying to reach the model.
with mock.patch("requests.Session.request", return_value=response_mock) as mock_head:
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
_ = Wav2Vec2FeatureExtractor.from_pretrained("hf-internal-testing/tiny-random-wav2vec2")
# This check we did call the fake head request
mock_head.assert_called()
@ -57,7 +58,6 @@ class FeatureExtractorPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:

View File

@ -19,8 +19,7 @@ import unittest.mock as mock
from pathlib import Path
from huggingface_hub import hf_hub_download
from huggingface_hub.errors import LocalEntryNotFoundError, OfflineModeIsEnabled
from requests.exceptions import HTTPError
from huggingface_hub.errors import HfHubHTTPError, LocalEntryNotFoundError, OfflineModeIsEnabled
from transformers.utils import (
CONFIG_NAME,
@ -87,7 +86,10 @@ class GetFromCacheTests(unittest.TestCase):
self.assertIsNone(path)
# Under the mock environment, hf_hub_download will always raise an HTTPError
with mock.patch("transformers.utils.hub.hf_hub_download", side_effect=HTTPError) as mock_head:
with mock.patch(
"transformers.utils.hub.hf_hub_download",
side_effect=HfHubHTTPError("failed", response=mock.Mock(status_code=404)),
) as mock_head:
path = cached_file(RANDOM_BERT, "conf", _raise_exceptions_for_connection_errors=False)
self.assertIsNone(path)
# This check we did call the fake head request

View File

@ -18,8 +18,7 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder
from requests.exceptions import HTTPError
import httpx
from transformers import AutoImageProcessor, ViTImageProcessor, ViTImageProcessorFast
from transformers.image_processing_utils import get_size_dict
@ -40,7 +39,9 @@ class ImageProcessorUtilTester(unittest.TestCase):
response_mock = mock.Mock()
response_mock.status_code = 500
response_mock.headers = {}
response_mock.raise_for_status.side_effect = HTTPError
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
"failed", request=mock.Mock(), response=mock.Mock()
)
response_mock.json.return_value = {}
# Download this model to make sure it's in the cache.
@ -48,7 +49,7 @@ class ImageProcessorUtilTester(unittest.TestCase):
_ = ViTImageProcessorFast.from_pretrained("hf-internal-testing/tiny-random-vit")
# Under the mock environment we get a 500 error when trying to reach the model.
with mock.patch("requests.Session.request", return_value=response_mock) as mock_head:
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
_ = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit")
_ = ViTImageProcessorFast.from_pretrained("hf-internal-testing/tiny-random-vit")
# This check we did call the fake head request
@ -71,7 +72,6 @@ class ImageProcessorPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:

View File

@ -19,11 +19,10 @@ import unittest
from io import BytesIO
from typing import Optional
import httpx
import numpy as np
import pytest
import requests
from huggingface_hub.file_download import hf_hub_url, http_get
from requests import ConnectTimeout, ReadTimeout
from tests.pipelines.test_pipelines_document_question_answering import INVOICE_URL
from transformers import is_torch_available, is_vision_available
@ -49,7 +48,7 @@ if is_vision_available():
def get_image_from_hub_dataset(dataset_id: str, filename: str, revision: Optional[str] = None) -> "PIL.Image.Image":
url = hf_hub_url(dataset_id, filename, repo_type="dataset", revision=revision)
return PIL.Image.open(BytesIO(requests.get(url).content))
return PIL.Image.open(BytesIO(httpx.get(url, follow_redirects=True).content))
def get_random_image(height, width):
@ -727,7 +726,7 @@ class LoadImageTester(unittest.TestCase):
@is_flaky()
def test_load_img_url_timeout(self):
with self.assertRaises((ReadTimeout, ConnectTimeout)):
with self.assertRaises(httpx.ConnectTimeout):
load_image(INVOICE_URL, timeout=0.001)
def test_load_img_local(self):

View File

@ -27,12 +27,11 @@ import uuid
import warnings
from pathlib import Path
import httpx
import pytest
import requests
from huggingface_hub import HfApi, HfFolder, split_torch_state_dict_into_shards
from huggingface_hub import HfApi, split_torch_state_dict_into_shards
from parameterized import parameterized
from pytest import mark
from requests.exceptions import HTTPError
from transformers import (
AutoConfig,
@ -419,7 +418,7 @@ class ModelUtilsTest(TestCasePlus):
# First attempt will fail with a connection error
if not hasattr(test_func, "attempt"):
test_func.attempt = 1
raise requests.exceptions.ConnectionError("Connection failed")
raise httpx.ConnectError("Connection failed")
# Second attempt will succeed
return True
@ -1172,14 +1171,16 @@ class ModelUtilsTest(TestCasePlus):
response_mock = mock.Mock()
response_mock.status_code = 500
response_mock.headers = {}
response_mock.raise_for_status.side_effect = HTTPError
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
"failed", request=mock.Mock(), response=mock.Mock()
)
response_mock.json.return_value = {}
# Download this model to make sure it's in the cache.
_ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
# Under the mock environment we get a 500 error when trying to reach the model.
with mock.patch("requests.Session.request", return_value=response_mock) as mock_head:
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
_ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert")
# This check we did call the fake head request
mock_head.assert_called()
@ -2205,10 +2206,7 @@ class ModelOnTheFlyConversionTester(unittest.TestCase):
initial_model = BertModel(config)
initial_model.push_to_hub(self.repo_name, token=self.token, safe_serialization=False)
headers = {"Authorization": f"Bearer {self.token}"}
requests.put(
f"https://huggingface.co/api/models/{self.repo_name}/settings", json={"gated": "auto"}, headers=headers
)
self.api.update_repo_settings(self.repo_name, gated="auto")
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
with self.subTest("Initial and converted models are equal"):
@ -2269,7 +2267,7 @@ class ModelOnTheFlyConversionTester(unittest.TestCase):
initial_model.push_to_hub(self.repo_name, token=self.token, max_shard_size="200kb", safe_serialization=False)
headers = {"Authorization": f"Bearer {self.token}"}
requests.put(
httpx.put(
f"https://huggingface.co/api/models/{self.repo_name}/settings", json={"gated": "auto"}, headers=headers
)
converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token)
@ -2368,7 +2366,7 @@ class ModelOnTheFlyConversionTester(unittest.TestCase):
@mock.patch("transformers.safetensors_conversion.spawn_conversion")
def test_absence_of_safetensors_triggers_conversion_failed(self, spawn_conversion_mock):
spawn_conversion_mock.side_effect = HTTPError()
spawn_conversion_mock.side_effect = httpx.HTTPError("failed")
config = BertConfig(
vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37
@ -2388,7 +2386,6 @@ class ModelPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
@unittest.skip(reason="This test is flaky")
def test_push_to_hub(self):

View File

@ -19,9 +19,8 @@ import unittest
import unittest.mock as mock
from pathlib import Path
from huggingface_hub import HfFolder
import httpx
from huggingface_hub.file_download import http_get
from requests.exceptions import HTTPError
from transformers import (
AlbertTokenizer,
@ -50,14 +49,16 @@ class TokenizerUtilTester(unittest.TestCase):
response_mock = mock.Mock()
response_mock.status_code = 500
response_mock.headers = {}
response_mock.raise_for_status.side_effect = HTTPError
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
"failed", request=mock.Mock(), response=mock.Mock()
)
response_mock.json.return_value = {}
# Download this model to make sure it's in the cache.
_ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
# Under the mock environment we get a 500 error when trying to reach the tokenizer.
with mock.patch("requests.Session.request", return_value=response_mock) as mock_head:
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
_ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert")
# This check we did call the fake head request
mock_head.assert_called()
@ -68,14 +69,16 @@ class TokenizerUtilTester(unittest.TestCase):
response_mock = mock.Mock()
response_mock.status_code = 500
response_mock.headers = {}
response_mock.raise_for_status.side_effect = HTTPError
response_mock.raise_for_status.side_effect = httpx.HTTPStatusError(
"failed", request=mock.Mock(), response=mock.Mock()
)
response_mock.json.return_value = {}
# Download this model to make sure it's in the cache.
_ = GPT2TokenizerFast.from_pretrained("openai-community/gpt2")
# Under the mock environment we get a 500 error when trying to reach the tokenizer.
with mock.patch("requests.Session.request", return_value=response_mock) as mock_head:
with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head:
_ = GPT2TokenizerFast.from_pretrained("openai-community/gpt2")
# This check we did call the fake head request
mock_head.assert_called()
@ -115,7 +118,6 @@ class TokenizerPushToHubTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._token = TOKEN
HfFolder.save_token(TOKEN)
def test_push_to_hub(self):
with TemporaryHubRepo(token=self._token) as tmp_repo:

View File

@ -28,7 +28,7 @@ from pathlib import Path
from check_config_docstrings import get_checkpoint_from_config_class
from datasets import load_dataset
from get_test_info import get_model_to_tester_mapping, get_tester_classes_for_model
from huggingface_hub import Repository, create_repo, hf_api, upload_folder
from huggingface_hub import Repository, create_repo, hf_api, upload_folder # TODO: remove Repository
from transformers import (
CONFIG_MAPPING,

View File

@ -1,7 +1,7 @@
import os
import requests
from huggingface_hub import Repository, hf_hub_download
from huggingface_hub import hf_hub_download, snapshot_download
from transformers.testing_utils import _run_pipeline_tests, _run_staging
from transformers.utils.import_utils import is_mistral_common_available
@ -173,9 +173,9 @@ if __name__ == "__main__":
# But this repo. is never used in a test decorated by `is_staging_test`.
if not _run_staging:
if not os.path.isdir("tiny-random-custom-architecture"):
_ = Repository(
snapshot_download(
"hf-internal-testing/tiny-random-custom-architecture",
local_dir="tiny-random-custom-architecture",
clone_from="hf-internal-testing/tiny-random-custom-architecture",
)
# For `tests/test_tokenization_mistral_common.py:TestMistralCommonTokenizer`, which eventually calls

View File

@ -27,7 +27,7 @@ import os
import time
from create_dummy_models import COMPOSITE_MODELS, create_tiny_models
from huggingface_hub import ModelFilter, hf_api
from huggingface_hub import HfApi
import transformers
from transformers import AutoFeatureExtractor, AutoImageProcessor, AutoTokenizer
@ -65,15 +65,12 @@ def get_tiny_model_names_from_repo():
def get_tiny_model_summary_from_hub(output_path):
api = HfApi()
special_models = COMPOSITE_MODELS.values()
# All tiny model base names on Hub
model_names = get_all_model_names()
models = hf_api.list_models(
filter=ModelFilter(
author="hf-internal-testing",
)
)
models = api.list_models(author="hf-internal-testing")
_models = set()
for x in models:
model = x.id
@ -94,7 +91,7 @@ def get_tiny_model_summary_from_hub(output_path):
repo_id = f"hf-internal-testing/tiny-random-{model}"
model = model.split("-")[0]
try:
repo_info = hf_api.repo_info(repo_id)
repo_info = api.repo_info(repo_id)
content = {
"tokenizer_classes": set(),
"processor_classes": set(),