Fix local repos with remote code not registering for pipelines (#33100)

* Extremely experimental fix!

* Try removing the clause entirely

* Add test

* make fixup

* stash commit

* Remove breakpoint

* Add anti-regression test

* make fixup

* Move repos to hf-internal-testing!
This commit is contained in:
Matt
2024-08-30 16:56:22 +01:00
committed by GitHub
parent fbff27623a
commit 38d58a4427
3 changed files with 21 additions and 10 deletions

View File

@ -17,7 +17,6 @@
import copy
import importlib
import json
import os
import warnings
from collections import OrderedDict
@ -427,10 +426,7 @@ class _BaseAutoModelClass:
else:
repo_id = config.name_or_path
model_class = get_class_from_dynamic_module(class_ref, repo_id, **kwargs)
if os.path.isdir(config._name_or_path):
model_class.register_for_auto_class(cls.__name__)
else:
cls.register(config.__class__, model_class, exist_ok=True)
cls.register(config.__class__, model_class, exist_ok=True)
_ = kwargs.pop("code_revision", None)
return model_class._from_config(config, **kwargs)
elif type(config) in cls._model_mapping.keys():
@ -552,10 +548,7 @@ class _BaseAutoModelClass:
class_ref, pretrained_model_name_or_path, code_revision=code_revision, **hub_kwargs, **kwargs
)
_ = hub_kwargs.pop("code_revision", None)
if os.path.isdir(pretrained_model_name_or_path):
model_class.register_for_auto_class(cls.__name__)
else:
cls.register(config.__class__, model_class, exist_ok=True)
cls.register(config.__class__, model_class, exist_ok=True)
return model_class.from_pretrained(
pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
)

View File

@ -21,6 +21,7 @@ from collections import OrderedDict
from pathlib import Path
import pytest
from huggingface_hub import Repository
import transformers
from transformers import BertConfig, GPT2Model, is_safetensors_available, is_torch_available
@ -529,3 +530,12 @@ class AutoModelTest(unittest.TestCase):
_MODEL_MAPPING_NAMES = OrderedDict([("bert", "GPT2Model")])
_MODEL_MAPPING = _LazyAutoMapping(_CONFIG_MAPPING_NAMES, _MODEL_MAPPING_NAMES)
self.assertEqual(_MODEL_MAPPING[BertConfig], GPT2Model)
def test_dynamic_saving_from_local_repo(self):
with tempfile.TemporaryDirectory() as tmp_dir, tempfile.TemporaryDirectory() as tmp_dir_out:
_ = Repository(local_dir=tmp_dir, clone_from="hf-internal-testing/tiny-random-custom-architecture")
model = AutoModelForCausalLM.from_pretrained(tmp_dir, trust_remote_code=True)
model.save_pretrained(tmp_dir_out)
_ = AutoModelForCausalLM.from_pretrained(tmp_dir_out, trust_remote_code=True)
self.assertTrue((Path(tmp_dir_out) / "modeling_fake_custom.py").is_file())
self.assertTrue((Path(tmp_dir_out) / "configuration_fake_custom.py").is_file())

View File

@ -22,7 +22,7 @@ from pathlib import Path
import datasets
import numpy as np
from huggingface_hub import HfFolder, delete_repo
from huggingface_hub import HfFolder, Repository, delete_repo
from requests.exceptions import HTTPError
from transformers import (
@ -226,6 +226,14 @@ class CommonPipelineTest(unittest.TestCase):
pipe.model = None
self.assertIsNone(pipe.torch_dtype)
@require_torch
def test_auto_model_pipeline_registration_from_local_dir(self):
with tempfile.TemporaryDirectory() as tmp_dir:
_ = Repository(local_dir=tmp_dir, clone_from="hf-internal-testing/tiny-random-custom-architecture")
pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True)
self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load
@is_pipeline_test
class PipelineScikitCompatTest(unittest.TestCase):