mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
Fix hidden torchvision>=0.15 dependency issue (#39928)
* use pil_torch_interpolation_mapping for NEAREST/NEAREST_EXACT * fix min torchvision version * use InterpolationMode directly * remove unused is_torchvision_greater_or_equal, * nit
This commit is contained in:
@ -31,6 +31,7 @@ from .utils import (
|
||||
is_torch_available,
|
||||
is_torch_tensor,
|
||||
is_torchvision_available,
|
||||
is_torchvision_v2_available,
|
||||
is_vision_available,
|
||||
logging,
|
||||
requires_backends,
|
||||
@ -59,7 +60,9 @@ if is_vision_available():
|
||||
from torchvision.transforms import InterpolationMode
|
||||
|
||||
pil_torch_interpolation_mapping = {
|
||||
PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT,
|
||||
PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else InterpolationMode.NEAREST,
|
||||
PILImageResampling.BOX: InterpolationMode.BOX,
|
||||
PILImageResampling.BILINEAR: InterpolationMode.BILINEAR,
|
||||
PILImageResampling.HAMMING: InterpolationMode.HAMMING,
|
||||
|
@ -57,6 +57,7 @@ if is_torch_available():
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms import functional as F
|
||||
@ -454,10 +455,16 @@ class ConditionalDetrImageProcessorFast(BaseImageProcessorFast):
|
||||
The target size of the image, as returned by the preprocessing `resize` step.
|
||||
threshold (`float`, *optional*, defaults to 0.5):
|
||||
The threshold used to binarize the segmentation masks.
|
||||
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
|
||||
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
|
||||
The resampling filter to use when resizing the masks.
|
||||
"""
|
||||
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
|
||||
interpolation = (
|
||||
interpolation
|
||||
if interpolation is not None
|
||||
else F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST
|
||||
)
|
||||
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
|
||||
|
||||
new_annotation = {}
|
||||
|
@ -48,6 +48,7 @@ if is_torch_available():
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms import functional as F
|
||||
@ -445,10 +446,16 @@ class DeformableDetrImageProcessorFast(BaseImageProcessorFast):
|
||||
The target size of the image, as returned by the preprocessing `resize` step.
|
||||
threshold (`float`, *optional*, defaults to 0.5):
|
||||
The threshold used to binarize the segmentation masks.
|
||||
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
|
||||
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
|
||||
The resampling filter to use when resizing the masks.
|
||||
"""
|
||||
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
|
||||
interpolation = (
|
||||
interpolation
|
||||
if interpolation is not None
|
||||
else F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST
|
||||
)
|
||||
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
|
||||
|
||||
new_annotation = {}
|
||||
|
@ -70,6 +70,7 @@ if is_vision_available():
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms import functional as F
|
||||
@ -466,10 +467,16 @@ class DetrImageProcessorFast(BaseImageProcessorFast):
|
||||
The target size of the image, as returned by the preprocessing `resize` step.
|
||||
threshold (`float`, *optional*, defaults to 0.5):
|
||||
The threshold used to binarize the segmentation masks.
|
||||
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
|
||||
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
|
||||
The resampling filter to use when resizing the masks.
|
||||
"""
|
||||
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
|
||||
interpolation = (
|
||||
interpolation
|
||||
if interpolation is not None
|
||||
else F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST
|
||||
)
|
||||
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
|
||||
|
||||
new_annotation = {}
|
||||
|
@ -33,7 +33,6 @@ from ...image_utils import (
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
SizeDict,
|
||||
pil_torch_interpolation_mapping,
|
||||
)
|
||||
from ...processing_utils import Unpack
|
||||
from ...utils import (
|
||||
@ -210,7 +209,9 @@ class EomtImageProcessorFast(BaseImageProcessorFast):
|
||||
"do_normalize": False,
|
||||
"do_rescale": False,
|
||||
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
|
||||
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
|
||||
"interpolation": F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -51,6 +51,7 @@ if is_torch_available():
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms import functional as F
|
||||
@ -476,10 +477,16 @@ class GroundingDinoImageProcessorFast(BaseImageProcessorFast):
|
||||
The target size of the image, as returned by the preprocessing `resize` step.
|
||||
threshold (`float`, *optional*, defaults to 0.5):
|
||||
The threshold used to binarize the segmentation masks.
|
||||
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
|
||||
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
|
||||
The resampling filter to use when resizing the masks.
|
||||
"""
|
||||
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
|
||||
interpolation = (
|
||||
interpolation
|
||||
if interpolation is not None
|
||||
else F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST
|
||||
)
|
||||
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
|
||||
|
||||
new_annotation = {}
|
||||
|
@ -62,6 +62,7 @@ if is_torch_available():
|
||||
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.transforms import functional as F
|
||||
|
||||
@ -369,7 +370,9 @@ class Mask2FormerImageProcessorFast(BaseImageProcessorFast):
|
||||
image=grouped_segmentation_maps[shape],
|
||||
size=size,
|
||||
size_divisor=size_divisor,
|
||||
interpolation=F.InterpolationMode.NEAREST_EXACT,
|
||||
interpolation=F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
)
|
||||
resized_images_grouped[shape] = stacked_images
|
||||
if segmentation_maps is not None:
|
||||
|
@ -66,6 +66,7 @@ if is_torch_available():
|
||||
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.transforms import functional as F
|
||||
|
||||
@ -370,7 +371,9 @@ class MaskFormerImageProcessorFast(BaseImageProcessorFast):
|
||||
image=grouped_segmentation_maps[shape],
|
||||
size=size,
|
||||
size_divisor=size_divisor,
|
||||
interpolation=F.InterpolationMode.NEAREST_EXACT,
|
||||
interpolation=F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
)
|
||||
resized_images_grouped[shape] = stacked_images
|
||||
if segmentation_maps is not None:
|
||||
|
@ -31,7 +31,6 @@ from ...image_utils import (
|
||||
PILImageResampling,
|
||||
SizeDict,
|
||||
is_torch_tensor,
|
||||
pil_torch_interpolation_mapping,
|
||||
)
|
||||
from ...processing_utils import Unpack
|
||||
from ...utils import (
|
||||
@ -139,7 +138,9 @@ class MobileNetV2ImageProcessorFast(BaseImageProcessorFast):
|
||||
"do_normalize": False,
|
||||
"do_rescale": False,
|
||||
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
|
||||
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
|
||||
"interpolation": F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -29,7 +29,6 @@ from ...image_utils import (
|
||||
PILImageResampling,
|
||||
SizeDict,
|
||||
is_torch_tensor,
|
||||
pil_torch_interpolation_mapping,
|
||||
)
|
||||
from ...processing_utils import Unpack
|
||||
from ...utils import (
|
||||
@ -140,7 +139,9 @@ class MobileViTImageProcessorFast(BaseImageProcessorFast):
|
||||
"do_rescale": False,
|
||||
"do_flip_channel_order": False,
|
||||
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
|
||||
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
|
||||
"interpolation": F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -457,7 +457,11 @@ class OneFormerImageProcessorFast(BaseImageProcessorFast):
|
||||
for shape, stacked_segmentation_maps in grouped_segmentation_maps.items():
|
||||
if do_resize:
|
||||
stacked_segmentation_maps = self.resize(
|
||||
stacked_segmentation_maps, size=size, interpolation=F.InterpolationMode.NEAREST_EXACT
|
||||
stacked_segmentation_maps,
|
||||
size=size,
|
||||
interpolation=F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
)
|
||||
processed_segmentation_maps_grouped[shape] = stacked_segmentation_maps
|
||||
processed_segmentation_maps = reorder_images(
|
||||
|
@ -264,10 +264,16 @@ class RTDetrImageProcessorFast(BaseImageProcessorFast):
|
||||
The target size of the image, as returned by the preprocessing `resize` step.
|
||||
threshold (`float`, *optional*, defaults to 0.5):
|
||||
The threshold used to binarize the segmentation masks.
|
||||
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
|
||||
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
|
||||
The resampling filter to use when resizing the masks.
|
||||
"""
|
||||
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
|
||||
interpolation = (
|
||||
interpolation
|
||||
if interpolation is not None
|
||||
else F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST
|
||||
)
|
||||
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
|
||||
|
||||
new_annotation = {}
|
||||
|
@ -36,7 +36,6 @@ from ...image_utils import (
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
SizeDict,
|
||||
pil_torch_interpolation_mapping,
|
||||
)
|
||||
from ...processing_utils import Unpack
|
||||
from ...utils import (
|
||||
@ -244,7 +243,9 @@ class SamImageProcessorFast(BaseImageProcessorFast):
|
||||
{
|
||||
"do_normalize": False,
|
||||
"do_rescale": False,
|
||||
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
|
||||
"interpolation": F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
"size": segmentation_maps_kwargs.pop("mask_size"),
|
||||
"pad_size": segmentation_maps_kwargs.pop("mask_pad_size"),
|
||||
}
|
||||
|
@ -36,7 +36,6 @@ from ...image_utils import (
|
||||
PILImageResampling,
|
||||
SizeDict,
|
||||
is_torch_tensor,
|
||||
pil_torch_interpolation_mapping,
|
||||
)
|
||||
from ...processing_utils import Unpack
|
||||
from ...utils import (
|
||||
@ -143,7 +142,9 @@ class SegformerImageProcessorFast(BaseImageProcessorFast):
|
||||
"do_normalize": False,
|
||||
"do_rescale": False,
|
||||
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
|
||||
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
|
||||
"interpolation": F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
}
|
||||
)
|
||||
processed_segmentation_maps = self._preprocess(
|
||||
|
@ -30,7 +30,6 @@ from ...image_utils import (
|
||||
ImageInput,
|
||||
PILImageResampling,
|
||||
SizeDict,
|
||||
pil_torch_interpolation_mapping,
|
||||
)
|
||||
from ...processing_utils import Unpack
|
||||
from ...utils import (
|
||||
@ -100,7 +99,9 @@ class SegformerImageProcessorFast(BeitImageProcessorFast):
|
||||
"do_normalize": False,
|
||||
"do_rescale": False,
|
||||
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
|
||||
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
|
||||
"interpolation": F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST,
|
||||
}
|
||||
)
|
||||
processed_segmentation_maps = self._preprocess(
|
||||
|
@ -47,6 +47,7 @@ if is_torch_available():
|
||||
if is_torchvision_v2_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms.v2 import functional as F
|
||||
|
||||
elif is_torchvision_available():
|
||||
from torchvision.io import read_image
|
||||
from torchvision.transforms import functional as F
|
||||
@ -493,10 +494,16 @@ class YolosImageProcessorFast(BaseImageProcessorFast):
|
||||
The target size of the image, as returned by the preprocessing `resize` step.
|
||||
threshold (`float`, *optional*, defaults to 0.5):
|
||||
The threshold used to binarize the segmentation masks.
|
||||
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
|
||||
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
|
||||
The resampling filter to use when resizing the masks.
|
||||
"""
|
||||
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
|
||||
interpolation = (
|
||||
interpolation
|
||||
if interpolation is not None
|
||||
else F.InterpolationMode.NEAREST_EXACT
|
||||
if is_torchvision_v2_available()
|
||||
else F.InterpolationMode.NEAREST
|
||||
)
|
||||
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
|
||||
|
||||
new_annotation = {}
|
||||
|
Reference in New Issue
Block a user