Fix hidden torchvision>=0.15 dependency issue (#39928)

* use pil_torch_interpolation_mapping for NEAREST/NEAREST_EXACT

* fix min torchvision version

* use InterpolationMode directly

* remove unused is_torchvision_greater_or_equal,

* nit
This commit is contained in:
Yoni Gozlan
2025-08-13 11:13:42 -04:00
committed by GitHub
parent 11537c3e0c
commit f445caeb0f
16 changed files with 88 additions and 28 deletions

View File

@ -31,6 +31,7 @@ from .utils import (
is_torch_available,
is_torch_tensor,
is_torchvision_available,
is_torchvision_v2_available,
is_vision_available,
logging,
requires_backends,
@ -59,7 +60,9 @@ if is_vision_available():
from torchvision.transforms import InterpolationMode
pil_torch_interpolation_mapping = {
PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT,
PILImageResampling.NEAREST: InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else InterpolationMode.NEAREST,
PILImageResampling.BOX: InterpolationMode.BOX,
PILImageResampling.BILINEAR: InterpolationMode.BILINEAR,
PILImageResampling.HAMMING: InterpolationMode.HAMMING,

View File

@ -57,6 +57,7 @@ if is_torch_available():
if is_torchvision_v2_available():
from torchvision.io import read_image
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.io import read_image
from torchvision.transforms import functional as F
@ -454,10 +455,16 @@ class ConditionalDetrImageProcessorFast(BaseImageProcessorFast):
The target size of the image, as returned by the preprocessing `resize` step.
threshold (`float`, *optional*, defaults to 0.5):
The threshold used to binarize the segmentation masks.
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
The resampling filter to use when resizing the masks.
"""
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
interpolation = (
interpolation
if interpolation is not None
else F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST
)
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
new_annotation = {}

View File

@ -48,6 +48,7 @@ if is_torch_available():
if is_torchvision_v2_available():
from torchvision.io import read_image
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.io import read_image
from torchvision.transforms import functional as F
@ -445,10 +446,16 @@ class DeformableDetrImageProcessorFast(BaseImageProcessorFast):
The target size of the image, as returned by the preprocessing `resize` step.
threshold (`float`, *optional*, defaults to 0.5):
The threshold used to binarize the segmentation masks.
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
The resampling filter to use when resizing the masks.
"""
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
interpolation = (
interpolation
if interpolation is not None
else F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST
)
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
new_annotation = {}

View File

@ -70,6 +70,7 @@ if is_vision_available():
if is_torchvision_v2_available():
from torchvision.io import read_image
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.io import read_image
from torchvision.transforms import functional as F
@ -466,10 +467,16 @@ class DetrImageProcessorFast(BaseImageProcessorFast):
The target size of the image, as returned by the preprocessing `resize` step.
threshold (`float`, *optional*, defaults to 0.5):
The threshold used to binarize the segmentation masks.
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
The resampling filter to use when resizing the masks.
"""
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
interpolation = (
interpolation
if interpolation is not None
else F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST
)
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
new_annotation = {}

View File

@ -33,7 +33,6 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
SizeDict,
pil_torch_interpolation_mapping,
)
from ...processing_utils import Unpack
from ...utils import (
@ -210,7 +209,9 @@ class EomtImageProcessorFast(BaseImageProcessorFast):
"do_normalize": False,
"do_rescale": False,
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
"interpolation": F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
}
)

View File

@ -51,6 +51,7 @@ if is_torch_available():
if is_torchvision_v2_available():
from torchvision.io import read_image
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.io import read_image
from torchvision.transforms import functional as F
@ -476,10 +477,16 @@ class GroundingDinoImageProcessorFast(BaseImageProcessorFast):
The target size of the image, as returned by the preprocessing `resize` step.
threshold (`float`, *optional*, defaults to 0.5):
The threshold used to binarize the segmentation masks.
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
The resampling filter to use when resizing the masks.
"""
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
interpolation = (
interpolation
if interpolation is not None
else F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST
)
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
new_annotation = {}

View File

@ -62,6 +62,7 @@ if is_torch_available():
if is_torchvision_v2_available():
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.transforms import functional as F
@ -369,7 +370,9 @@ class Mask2FormerImageProcessorFast(BaseImageProcessorFast):
image=grouped_segmentation_maps[shape],
size=size,
size_divisor=size_divisor,
interpolation=F.InterpolationMode.NEAREST_EXACT,
interpolation=F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
)
resized_images_grouped[shape] = stacked_images
if segmentation_maps is not None:

View File

@ -66,6 +66,7 @@ if is_torch_available():
if is_torchvision_v2_available():
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.transforms import functional as F
@ -370,7 +371,9 @@ class MaskFormerImageProcessorFast(BaseImageProcessorFast):
image=grouped_segmentation_maps[shape],
size=size,
size_divisor=size_divisor,
interpolation=F.InterpolationMode.NEAREST_EXACT,
interpolation=F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
)
resized_images_grouped[shape] = stacked_images
if segmentation_maps is not None:

View File

@ -31,7 +31,6 @@ from ...image_utils import (
PILImageResampling,
SizeDict,
is_torch_tensor,
pil_torch_interpolation_mapping,
)
from ...processing_utils import Unpack
from ...utils import (
@ -139,7 +138,9 @@ class MobileNetV2ImageProcessorFast(BaseImageProcessorFast):
"do_normalize": False,
"do_rescale": False,
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
"interpolation": F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
}
)

View File

@ -29,7 +29,6 @@ from ...image_utils import (
PILImageResampling,
SizeDict,
is_torch_tensor,
pil_torch_interpolation_mapping,
)
from ...processing_utils import Unpack
from ...utils import (
@ -140,7 +139,9 @@ class MobileViTImageProcessorFast(BaseImageProcessorFast):
"do_rescale": False,
"do_flip_channel_order": False,
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
"interpolation": F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
}
)

View File

@ -457,7 +457,11 @@ class OneFormerImageProcessorFast(BaseImageProcessorFast):
for shape, stacked_segmentation_maps in grouped_segmentation_maps.items():
if do_resize:
stacked_segmentation_maps = self.resize(
stacked_segmentation_maps, size=size, interpolation=F.InterpolationMode.NEAREST_EXACT
stacked_segmentation_maps,
size=size,
interpolation=F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
)
processed_segmentation_maps_grouped[shape] = stacked_segmentation_maps
processed_segmentation_maps = reorder_images(

View File

@ -264,10 +264,16 @@ class RTDetrImageProcessorFast(BaseImageProcessorFast):
The target size of the image, as returned by the preprocessing `resize` step.
threshold (`float`, *optional*, defaults to 0.5):
The threshold used to binarize the segmentation masks.
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
The resampling filter to use when resizing the masks.
"""
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
interpolation = (
interpolation
if interpolation is not None
else F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST
)
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
new_annotation = {}

View File

@ -36,7 +36,6 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
SizeDict,
pil_torch_interpolation_mapping,
)
from ...processing_utils import Unpack
from ...utils import (
@ -244,7 +243,9 @@ class SamImageProcessorFast(BaseImageProcessorFast):
{
"do_normalize": False,
"do_rescale": False,
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
"interpolation": F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
"size": segmentation_maps_kwargs.pop("mask_size"),
"pad_size": segmentation_maps_kwargs.pop("mask_pad_size"),
}

View File

@ -36,7 +36,6 @@ from ...image_utils import (
PILImageResampling,
SizeDict,
is_torch_tensor,
pil_torch_interpolation_mapping,
)
from ...processing_utils import Unpack
from ...utils import (
@ -143,7 +142,9 @@ class SegformerImageProcessorFast(BaseImageProcessorFast):
"do_normalize": False,
"do_rescale": False,
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
"interpolation": F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
}
)
processed_segmentation_maps = self._preprocess(

View File

@ -30,7 +30,6 @@ from ...image_utils import (
ImageInput,
PILImageResampling,
SizeDict,
pil_torch_interpolation_mapping,
)
from ...processing_utils import Unpack
from ...utils import (
@ -100,7 +99,9 @@ class SegformerImageProcessorFast(BeitImageProcessorFast):
"do_normalize": False,
"do_rescale": False,
# Nearest interpolation is used for segmentation maps instead of BILINEAR.
"interpolation": pil_torch_interpolation_mapping[PILImageResampling.NEAREST],
"interpolation": F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST,
}
)
processed_segmentation_maps = self._preprocess(

View File

@ -47,6 +47,7 @@ if is_torch_available():
if is_torchvision_v2_available():
from torchvision.io import read_image
from torchvision.transforms.v2 import functional as F
elif is_torchvision_available():
from torchvision.io import read_image
from torchvision.transforms import functional as F
@ -493,10 +494,16 @@ class YolosImageProcessorFast(BaseImageProcessorFast):
The target size of the image, as returned by the preprocessing `resize` step.
threshold (`float`, *optional*, defaults to 0.5):
The threshold used to binarize the segmentation masks.
resample (`InterpolationMode`, defaults to `InterpolationMode.NEAREST_EXACT`):
resample (`InterpolationMode`, defaults to `F.InterpolationMode.NEAREST_EXACT`):
The resampling filter to use when resizing the masks.
"""
interpolation = interpolation if interpolation is not None else F.InterpolationMode.NEAREST_EXACT
interpolation = (
interpolation
if interpolation is not None
else F.InterpolationMode.NEAREST_EXACT
if is_torchvision_v2_available()
else F.InterpolationMode.NEAREST
)
ratio_height, ratio_width = [target / orig for target, orig in zip(target_size, orig_size)]
new_annotation = {}