Compare commits

..

3 Commits

Author SHA1 Message Date
29845c2460 fix eetq 2024-11-21 23:15:27 +00:00
4a7158d05c skip eetq tests loading shard_checkpoint 2024-11-21 23:00:21 +00:00
b2e08a8466 New awq version 2024-11-21 22:34:01 +00:00
13 changed files with 48 additions and 489 deletions

View File

@ -1,287 +0,0 @@
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
# This file was automatically generated from examples/modular-transformers/modular_new_imgproc_model.py.
# Do NOT edit this file manually as any edits will be overwritten by the generation of
# the file from the modular. If any change should be done, please apply the change to the
# modular_new_imgproc_model.py file directly. One of our CI enforces this.
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
from typing import Dict, List, Optional, Union
import numpy as np
import torch
from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
from ...image_transforms import convert_to_rgb, resize, to_channel_dimension_format
from ...image_utils import (
OPENAI_CLIP_MEAN,
OPENAI_CLIP_STD,
ChannelDimension,
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
validate_preprocess_arguments,
)
from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging
if is_vision_available():
import PIL
logger = logging.get_logger(__name__)
class ImgprocModelImageProcessor(BaseImageProcessor):
r"""
Constructs a NEW_IMGPROC_MODEL image processor.
Args:
do_resize (`bool`, *optional*, defaults to `True`):
Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
`do_resize` parameter in the `preprocess` method.
size (`dict`, *optional*, defaults to `{"height": 384, "width": 384}`):
Size of the output image after resizing. Can be overridden by the `size` parameter in the `preprocess`
method.
resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`. Can be
overridden by the `resample` parameter in the `preprocess` method.
do_rescale (`bool`, *optional*, defaults to `True`):
Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the
`do_rescale` parameter in the `preprocess` method.
rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
overridden by the `rescale_factor` parameter in the `preprocess` method.
do_normalize (`bool`, *optional*, defaults to `True`):
Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
image_mean (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
Mean to use if normalizing the image. This is a float or list of floats the length of the number of
channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
overridden by the `image_mean` parameter in the `preprocess` method.
image_std (`float` or `List[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
Can be overridden by the `image_std` parameter in the `preprocess` method.
do_convert_rgb (`bool`, *optional*, defaults to `True`):
Whether to convert the image to RGB.
"""
model_input_names = ["pixel_values"]
def __init__(
self,
do_resize: bool = True,
size: Dict[str, int] = None,
resample: PILImageResampling = PILImageResampling.BICUBIC,
do_rescale: bool = True,
rescale_factor: Union[int, float] = 1 / 255,
do_normalize: bool = True,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
do_convert_rgb: bool = True,
**kwargs,
) -> None:
super().__init__(**kwargs)
size = size if size is not None else {"height": 384, "width": 384}
size = get_size_dict(size, default_to_square=True)
self.do_resize = do_resize
self.size = size
self.resample = resample
self.do_rescale = do_rescale
self.rescale_factor = rescale_factor
self.do_normalize = do_normalize
self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
self.do_convert_rgb = do_convert_rgb
def resize(
self,
image: np.ndarray,
size: Dict[str, int],
resample: PILImageResampling = PILImageResampling.BICUBIC,
data_format: Optional[Union[str, ChannelDimension]] = None,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
**kwargs,
) -> np.ndarray:
"""
Resize an image to `(size["height"], size["width"])`.
Args:
image (`np.ndarray`):
Image to resize.
size (`Dict[str, int]`):
Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
`PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BICUBIC`.
data_format (`ChannelDimension` or `str`, *optional*):
The channel dimension format for the output image. If unset, the channel dimension format of the input
image is used. Can be one of:
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
input_data_format (`ChannelDimension` or `str`, *optional*):
The channel dimension format for the input image. If unset, the channel dimension format is inferred
from the input image. Can be one of:
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
Returns:
`np.ndarray`: The resized image.
"""
size = get_size_dict(size)
if "height" not in size or "width" not in size:
raise ValueError(f"The `size` dictionary must contain the keys `height` and `width`. Got {size.keys()}")
output_size = (size["height"], size["width"])
return resize(
image,
size=output_size,
resample=resample,
data_format=data_format,
input_data_format=input_data_format,
**kwargs,
)
@filter_out_non_signature_kwargs()
def preprocess(
self,
images: ImageInput,
do_resize: Optional[bool] = None,
size: Optional[Dict[str, int]] = None,
resample: PILImageResampling = None,
do_rescale: Optional[bool] = None,
rescale_factor: Optional[float] = None,
do_normalize: Optional[bool] = None,
image_mean: Optional[Union[float, List[float]]] = None,
image_std: Optional[Union[float, List[float]]] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
do_convert_rgb: bool = None,
data_format: ChannelDimension = ChannelDimension.FIRST,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
) -> PIL.Image.Image:
"""
Preprocess an image or batch of images.
Args:
images (`ImageInput`):
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Controls the size of the image after `resize`. The shortest edge of the image is resized to
`size["shortest_edge"]` whilst preserving the aspect ratio. If the longest edge of this resized image
is > `int(size["shortest_edge"] * (1333 / 800))`, then the image is resized again to make the longest
edge equal to `int(size["shortest_edge"] * (1333 / 800))`.
resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`.
do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
Whether to rescale the image values between [0 - 1].
rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
Rescale factor to rescale the image by if `do_rescale` is set to `True`.
do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
Whether to normalize the image.
image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
Image mean to normalize the image by if `do_normalize` is set to `True`.
image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
Image standard deviation to normalize the image by if `do_normalize` is set to `True`.
do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
Whether to convert the image to RGB.
return_tensors (`str` or `TensorType`, *optional*):
The type of tensors to return. Can be one of:
- Unset: Return a list of `np.ndarray`.
- `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
- `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
- `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
- `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
The channel dimension format for the output image. Can be one of:
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
- Unset: Use the channel dimension format of the input image.
input_data_format (`ChannelDimension` or `str`, *optional*):
The channel dimension format for the input image. If unset, the channel dimension format is inferred
from the input image. Can be one of:
- `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
- `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
- `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
"""
do_resize = do_resize if do_resize is not None else self.do_resize
resample = resample if resample is not None else self.resample
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
do_normalize = do_normalize if do_normalize is not None else self.do_normalize
image_mean = image_mean if image_mean is not None else self.image_mean
image_std = image_std if image_std is not None else self.image_std
do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
size = size if size is not None else self.size
size = get_size_dict(size, default_to_square=False)
images = make_list_of_images(images)
if not valid_images(images):
raise ValueError(
"Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
"torch.Tensor, tf.Tensor or jax.ndarray."
)
validate_preprocess_arguments(
do_rescale=do_rescale,
rescale_factor=rescale_factor,
do_normalize=do_normalize,
image_mean=image_mean,
image_std=image_std,
do_resize=do_resize,
size=size,
resample=resample,
)
# PIL RGBA images are converted to RGB
if do_convert_rgb:
images = [convert_to_rgb(image) for image in images]
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]
if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
if do_resize:
images = [
self.resize(image=image, size=size, resample=resample, input_data_format=input_data_format)
for image in images
]
if do_rescale:
images = [
self.rescale(image=image, scale=rescale_factor, input_data_format=input_data_format)
for image in images
]
if do_normalize:
images = [
self.normalize(image=image, mean=image_mean, std=image_std, input_data_format=input_data_format)
for image in images
]
images = [
to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) for image in images
]
encoded_outputs = BatchFeature(data={"pixel_values": images}, tensor_type=return_tensors)
return encoded_outputs
def new_image_processing_method(self, pixel_values: torch.FloatTensor):
return pixel_values / 2

View File

@ -1,9 +0,0 @@
import torch
import torch.utils.checkpoint
from transformers.models.blip.image_processing_blip import BlipImageProcessor
class ImgprocModelImageProcessor(BlipImageProcessor):
def new_image_processing_method(self, pixel_values: torch.FloatTensor):
return pixel_values / 2

View File

@ -15,7 +15,6 @@
# limitations under the License.
import copy
import inspect
import os
import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
@ -117,41 +116,6 @@ if is_accelerate_available():
from accelerate.hooks import AlignDevicesHook, add_hook_to_module
o = dict()
import queue
q = queue.Queue()
p = queue.Queue()
def model_forward_2(model, *args, **kwargs):
with torch.no_grad():
return model.forward(*args, **kwargs)
my_model = None
def foo():
while True:
item = q.get()
o, model, model_inputs, put_output = item
if o['model_forward'] is None:
#if isinstance(model_kwargs.get("past_key_values"), StaticCache):
if model.device.type == "cuda":
logger.warning_once("Using `torch.compile`.")
os.environ["TOKENIZERS_PARALLELISM"] = "0"
model_forward_3 = torch.compile(model_forward_2, mode="reduce-overhead", fullgraph=True)
outputs = model_forward_3(my_model, return_dict=True, **model_inputs)
o['model_forward'] = model_forward_3
else:
outputs = o['model_forward'](my_model, return_dict=True, **model_inputs)
o['outputs'] = outputs
# only put if necessary!
if put_output:
p.put(o)
@dataclass
class GenerateDecoderOnlyOutput(ModelOutput):
"""
@ -1065,6 +1029,10 @@ class GenerationMixin:
"You have explicitly specified `forced_decoder_ids`. Please remove the `forced_decoder_ids` argument "
"in favour of `input_ids` or `decoder_input_ids` respectively.",
)
if generation_config.watermarking_config is not None:
processors.append(
generation_config.watermarking_config.construct_processor(self.config.vocab_size, device)
)
# TODO (joao): find a strategy to specify the order of the processors
processors = self._merge_criteria_processor_list(processors, logits_processor)
@ -1117,12 +1085,6 @@ class GenerationMixin:
)
)
# Watermarking should be after all logits processing is finished (see #34630)
if generation_config.watermarking_config is not None:
processors.append(
generation_config.watermarking_config.construct_processor(self.config.vocab_size, device)
)
# `LogitNormalization` should always be the last logit processor, when present
if generation_config.renormalize_logits is True:
processors.append(LogitNormalization())
@ -3260,19 +3222,6 @@ class GenerationMixin:
unfinished_sequences = torch.ones(batch_size, dtype=torch.long, device=input_ids.device)
model_kwargs = self._get_initial_cache_position(input_ids, model_kwargs)
#o = dict()
if 'model_forward' not in o:
o['model_forward'] = None
# q.task_done()
import threading
t = threading.Thread(target=foo)
t.start()
i = 0
while self._has_unfinished_sequences(
this_peer_finished, synced_gpus, device=input_ids.device, cur_len=cur_len, max_length=max_length
):
@ -3283,41 +3232,8 @@ class GenerationMixin:
model_inputs.update({"output_attentions": output_attentions} if output_attentions else {})
model_inputs.update({"output_hidden_states": output_hidden_states} if output_hidden_states else {})
if i == 0:
already_compied = False
if o['model_forward'] is not None:
already_compied = True
outputs = self(**model_inputs, return_dict=True)
i += 1
else:
if not already_compied:
q.put((o, self, model_inputs, False))
# use self
outputs = self(**model_inputs, return_dict=True)
else:
# directly call (??)
# outputs = o['model_forward'](self, return_dict=True, **model_inputs)
q.put((o, self, model_inputs, True))
item = p.get()
outputs = item['outputs']
# if i == 1 and o['model_forward'] is None:
# # don't join
# # just compile
# q.put((o, self, model_inputs))
# # when compiled is done
# if o['model_forward'] is not None:
# import datetime
# s = datetime.datetime.now()
# q.put((o, self, model_inputs))
# item = p.get()
# outputs = item['outputs']
# d = (datetime.datetime.now() - s).total_seconds()
# # print(d)
# else:
# outputs = self(**model_inputs, return_dict=True)
i += 1
# forward pass to get next token
outputs = self(**model_inputs, return_dict=True)
# synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping
model_kwargs = self._update_model_kwargs_for_generation(

View File

@ -52,6 +52,7 @@ from .pytorch_utils import ( # noqa: F401
find_pruneable_heads_and_indices,
id_tensor_storage,
is_torch_greater_or_equal_than_1_13,
is_torch_greater_or_equal_than_2_4,
prune_conv1d_layer,
prune_layer,
prune_linear_layer,
@ -89,7 +90,6 @@ from .utils import (
is_peft_available,
is_remote_url,
is_safetensors_available,
is_torch_greater_or_equal,
is_torch_sdpa_available,
is_torch_xla_available,
logging,
@ -5032,7 +5032,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
device_mesh (`torch.distributed.DeviceMesh`):
The device mesh to use for tensor parallelism.
"""
if not is_torch_greater_or_equal("2.5"):
if not is_torch_greater_or_equal_than_2_4:
raise EnvironmentError("tensor parallel is only supported for `torch>=2.5`.")
# Tensor parallelize a nn.Module based on the `_tp_plan` attribute of the module.

View File

@ -38,7 +38,6 @@ from ...utils import TensorType, is_vision_available, logging
logger = logging.get_logger(__name__)
MAX_IMAGE_SIZE = 4096 # 4k resolution as absolute maximum
if is_vision_available():
@ -117,6 +116,7 @@ def _resize_output_size_scale_below_upper_bound(
def get_resize_output_image_size(
image,
resolution_max_side: int,
max_image_size: int = 1820,
input_data_format: Optional[Union[str, ChannelDimension]] = None,
) -> Tuple[int, int]:
"""
@ -126,18 +126,24 @@ def get_resize_output_image_size(
Image to resize.
resolution_max_side (`int`):
The longest edge of the image will be resized to this value. The shortest edge will be resized to keep the
input aspect ratio.
input aspect ratio, with a lower bound of `min_image_size`.
max_image_size (`int`, *optional*, defaults to 1820):
Maximum image resolution. If the image is larger than this size, the longest edge will be resized to this
value, with the shortest edge resized to keep the input aspect ratio, with a lower bound of `min_image_size`.
input_data_format (`ChannelDimension` or `str`):
The channel dimension format of the input image.
Returns:
The output size of the image after resizing.
"""
if resolution_max_side > max_image_size:
raise ValueError("`resolution_max_side` cannot be larger than `max_image_size`")
height, width = get_image_size(image, channel_dim=input_data_format)
# Find the output size, when rescaling the longest edge to max_len and preserving the aspect ratio
height, width = _resize_output_size_rescale_to_max_len(height, width, max_len=resolution_max_side)
# Find the output size when scaling the image to be below the MAX_IMAGE_SIZE
height, width = _resize_output_size_scale_below_upper_bound(height, width, max_len=MAX_IMAGE_SIZE)
# Find the output size when scaling the image to be below the max_image_size
height, width = _resize_output_size_scale_below_upper_bound(height, width, max_len=max_image_size)
return height, width
@ -245,7 +251,7 @@ def convert_to_rgb(
data_format = input_data_format if data_format is None else data_format
mode = "P" if palette is not None else None
image = to_pil_image(image, image_mode=mode, input_data_format=input_data_format)
image = to_pil_image(image, image_mode=mode)
if image.mode == "P" and palette is not None:
image.putpalette(palette)
@ -398,7 +404,7 @@ class Idefics3ImageProcessor(BaseImageProcessor):
image_mode = None
if image.ndim == 2 or image.shape[-1] == 1:
image_mode = "P"
image = to_pil_image(image, image_mode=image_mode, input_data_format=input_data_format)
image = to_pil_image(image, image_mode=image_mode)
resized_image = image.resize((size[1], size[0]), resample=resample)
resized_image = np.array(resized_image)
@ -748,16 +754,6 @@ class Idefics3ImageProcessor(BaseImageProcessor):
# All transformations expect numpy arrays.
images_list = [[to_numpy_array(image) for image in images] for images in images_list]
# Extra channel dimension for grayscale images
if input_data_format in [ChannelDimension.LAST, None]:
images_list = [
[np.expand_dims(img, axis=-1) if img.ndim == 2 else img for img in images] for images in images_list
]
elif input_data_format == ChannelDimension.FIRST:
images_list = [
[np.expand_dims(img, axis=0) if img.ndim == 2 else img for img in images] for images in images_list
]
if is_scaled_image(images_list[0][0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
@ -768,6 +764,18 @@ class Idefics3ImageProcessor(BaseImageProcessor):
if input_data_format is None:
input_data_format = infer_channel_dimension_format(images_list[0][0], num_channels=(1, 3, 4))
# Extra channel dimension for grayscale images
if input_data_format == ChannelDimension.LAST:
images_list = [
[np.expand_dims(img, axis=-1) if img.ndim == 2 else img for img in images] for images in images_list
]
elif input_data_format == ChannelDimension.FIRST:
images_list = [
[np.expand_dims(img, axis=0) if img.ndim == 2 else img for img in images] for images in images_list
]
else:
raise ValueError(f"Invalid channel dimension format {input_data_format}.")
if do_resize:
images_list = [
[

View File

@ -21,7 +21,7 @@ from packaging import version
from safetensors.torch import storage_ptr, storage_size
from torch import nn
from .utils import is_torch_greater_or_equal, is_torch_xla_available, logging
from .utils import is_torch_xla_available, logging
ALL_LAYERNORM_LAYERS = [nn.LayerNorm]
@ -39,7 +39,7 @@ is_torch_greater_or_equal_than_1_13 = parsed_torch_version_base >= version.parse
is_torch_greater_or_equal_than_1_12 = parsed_torch_version_base >= version.parse("1.12")
if is_torch_greater_or_equal("2.5"):
if is_torch_greater_or_equal_than_2_4:
from torch.distributed.tensor import Replicate
from torch.distributed.tensor.parallel import (
ColwiseParallel,

View File

@ -215,9 +215,6 @@ class HfQuantizer(ABC):
# Delete quantizer and quantization config
del model.hf_quantizer
del model.config.quantization_config
del model.config._pre_quantization_dtype
model.is_quantized = False
return model

View File

@ -53,20 +53,6 @@ class EetqHfQuantizer(HfQuantizer):
"Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQ"
)
try:
import eetq # noqa: F401
except ImportError as exc:
if "shard_checkpoint" in str(exc):
# EETQ 1.0.0 is currently broken with the latest transformers because it tries to import the removed
# shard_checkpoint function, see https://github.com/NetEase-FuXi/EETQ/issues/34.
# TODO: Update message once eetq releases a fix
raise ImportError(
"You are using a version of EETQ that is incompatible with the current transformers version. "
"Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0."
) from exc
else:
raise
if not is_accelerate_available():
raise ImportError("Loading an EETQ quantized model requires accelerate (`pip install accelerate`)")

View File

@ -1143,17 +1143,7 @@ def require_eetq(test_case):
"""
Decorator marking a test that requires eetq
"""
eetq_available = is_eetq_available()
if eetq_available:
try:
import eetq # noqa: F401
except ImportError as exc:
if "shard_checkpoint" in str(exc):
# EETQ 1.0.0 is currently broken with the latest transformers because it tries to import the removed
# shard_checkpoint function, see https://github.com/NetEase-FuXi/EETQ/issues/34.
# TODO: Remove once eetq releases a fix and this release is used in CI
eetq_available = False
return unittest.skipUnless(eetq_available, "test requires eetq")(test_case)
return unittest.skipUnless(is_eetq_available(), "test requires eetq")(test_case)
def require_av(test_case):

View File

@ -1006,6 +1006,17 @@ def is_auto_gptq_available():
def is_eetq_available():
if not _eetq_available:
return _eetq_available
try:
from eetq import EetqLinear # noqa: F401
except ImportError as exc:
if "shard_checkpoint" in str(exc):
# eetq is currently broken with newer transformers versions because it tries to import shard_checkpoint
# see https://github.com/NetEase-FuXi/EETQ/issues/34
# TODO: Remove once eetq releasees a fix and this release is used in CI
return False
return _eetq_available

View File

@ -14,7 +14,6 @@
# limitations under the License.
import collections
import copy
import gc
import inspect
@ -2451,58 +2450,6 @@ class UtilsFunctionsTest(unittest.TestCase):
self.assertTrue(n_matches.item() == 2)
self.assertTrue(validated_tokens.tolist()[0] == [1, 4, 8])
def test_speculative_sampling_target_distribution(self):
"""
Asserts that the target distribution is preserved.
Should help with catching issues like #32867.
"""
# assume vocab size 10, input length 5 + 3 generated candidates
candidate_input_ids = torch.tensor([[8, 0, 3, 9, 8, 1, 4, 5]]) # input tokens
candidate_logits = torch.tensor(
[
[
[-10.0, 10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0], # generated 1
[-10.0, -10.0, -10.0, -10.0, 10.0, -10.0, -10.0, -10.0, -10.0, -10.0], # generated 4
[-10.0, -10.0, -10.0, -10.0, -10.0, 10.0, -10.0, -10.0, -10.0, -10.0], # generated 5
]
]
)
candidate_length = 3
inf = float("inf")
new_logits = torch.tensor(
[
[
# accepts 1:
[-inf, 10.0, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
# accepts 4:
[-inf, -inf, -inf, -inf, 10.0, -inf, -inf, -inf, -inf, -inf],
# most likely to be 1 or 8, less likely to be 3, then 7, and should never be any other value:
[-inf, 2.0, -inf, 1.0, -inf, -inf, -inf, -0.01, 2.0, -inf],
# N/A:
[-inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf],
]
]
)
last_assistant_token_is_eos = False
last_validated_token = []
for _ in range(10_000):
validated_tokens, n_matches = _speculative_sampling(
candidate_input_ids,
candidate_logits,
candidate_length,
new_logits,
last_assistant_token_is_eos,
)
self.assertTrue(n_matches.item() == 2)
self.assertTrue(validated_tokens.tolist()[0][0] == 1)
self.assertTrue(validated_tokens.tolist()[0][1] == 4)
self.assertTrue(validated_tokens.tolist()[0][2] in [1, 3, 7, 8])
last_validated_token.append(validated_tokens.tolist()[0][2])
# check that the most likely tokens are selected more often than the less likely ones
last_token_counts = collections.Counter(last_validated_token)
self.assertTrue(last_token_counts[1] > last_token_counts[3] > last_token_counts[7] > 0)
self.assertTrue(last_token_counts[8] > last_token_counts[3])
@pytest.mark.generate
@require_torch

View File

@ -65,7 +65,7 @@ class BitNetTest(unittest.TestCase):
"""
Load the model
"""
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_name)
cls.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
cls.quantized_model = AutoModelForCausalLM.from_pretrained(cls.model_name, device_map=cls.device)
def tearDown(self):

View File

@ -1192,7 +1192,7 @@ class ModularFileMapper(ModuleMapper):
self.visited_modules = {}
self.renamers = {}
for file, module in self.model_specific_modules.items():
file_model_name = file.split(".")[-2]
file_model_name = re.search(r"models\.\w*?\.\w*?_(\S*)", file).groups()[0]
renamer = ReplaceNameTransformer(
file_model_name, self.model_name, self.given_old_name, self.given_new_name
)