Compare commits

...

2 Commits

Author SHA1 Message Date
70532bfb6d oops 2024-04-12 10:01:11 +02:00
500c97fe63 Don't use deprecated Repository anymore 2024-04-12 09:33:31 +02:00
6 changed files with 90 additions and 82 deletions

View File

@ -27,12 +27,11 @@ from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from datasets import DatasetDict, load_dataset
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from torch import nn
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer, SchedulerType, default_data_collator, get_scheduler
from transformers.utils import get_full_repo_name
from peft import LoraConfig, TaskType, get_peft_model
@ -236,12 +235,13 @@ def main():
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
create_repo(repo_name, exist_ok=True, token=args.hub_token)
repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token)
api = HfApi(token=args.hub_token)
# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
@ -487,7 +487,12 @@ def main():
if epoch < args.num_train_epochs - 1
else "End of training"
)
repo.push_to_hub(commit_message=commit_message, blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message=commit_message,
run_as_future=True,
)
accelerator.wait_for_everyone()
accelerator.end_training()

View File

@ -26,7 +26,7 @@ from accelerate.logging import get_logger
from datasets import Audio, DatasetDict, IterableDatasetDict, interleave_datasets, load_dataset
# hf imports
from huggingface_hub import Repository
from huggingface_hub import HfApi
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import (
@ -38,7 +38,6 @@ from transformers import (
set_seed,
)
from transformers.models.whisper.english_normalizer import BasicTextNormalizer
from transformers.utils import get_full_repo_name
# peft imports
from peft import AdaLoraConfig, LoraConfig, PeftModel, get_peft_model
@ -450,11 +449,13 @@ def main():
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name)
api = HfApi(token=args.hub_token)
# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
@ -739,8 +740,11 @@ def main():
if accelerator.is_main_process:
processor.tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message=f"Training in progress epoch {epoch}",
run_as_future=True,
)
if args.load_best_model:
@ -760,7 +764,11 @@ def main():
if accelerator.is_main_process:
processor.tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
)
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
eval_metrics.pop("eval_samples")

View File

@ -28,7 +28,7 @@ from accelerate import Accelerator, DistributedType
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from datasets import load_dataset
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import (
@ -333,14 +333,13 @@ def main():
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
# Retrieve of infer repo_name
api = HfApi(token=args.hub_token)
# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id
# Clone repo locally
repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token)
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
@ -780,8 +779,11 @@ def main():
)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message=f"Training in progress epoch {epoch}",
run_as_future=True,
)
if args.checkpointing_steps == "epoch":
@ -802,7 +804,11 @@ def main():
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
)
PATTERN_NUMBER = re.compile(r"-?\d+\.?\d*")

View File

@ -9,7 +9,6 @@ import threading
import warnings
from contextlib import nullcontext
from pathlib import Path
from typing import Optional
import datasets
import diffusers
@ -32,7 +31,7 @@ from diffusers import (
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from huggingface_hub import HfApi
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
@ -576,16 +575,6 @@ class PromptDataset(Dataset):
return example
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()
if organization is None:
username = whoami(token)["name"]
return f"{username}/{model_id}"
else:
return f"{organization}/{model_id}"
def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)
@ -678,11 +667,13 @@ def main(args):
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841
api = HfApi(token=args.hub_token)
# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
@ -1086,7 +1077,12 @@ def main(args):
pipeline.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
run_as_future=True,
)
accelerator.end_training()

View File

@ -9,7 +9,6 @@ import threading
import warnings
from contextlib import nullcontext
from pathlib import Path
from typing import Optional
import datasets
import diffusers
@ -32,7 +31,7 @@ from diffusers import (
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from huggingface_hub import HfApi
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
@ -586,16 +585,6 @@ class PromptDataset(Dataset):
return example
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()
if organization is None:
username = whoami(token)["name"]
return f"{username}/{model_id}"
else:
return f"{organization}/{model_id}"
def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)
@ -688,11 +677,13 @@ def main(args):
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841
api = HfApi(token=args.hub_token)
# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
@ -1094,7 +1085,12 @@ def main(args):
pipeline.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
run_as_future=True,
)
accelerator.end_training()

View File

@ -8,7 +8,7 @@ import os
import threading
import warnings
from pathlib import Path
from typing import Optional, Union
from typing import Union
import datasets
import diffusers
@ -31,7 +31,7 @@ from diffusers import (
from diffusers.optimization import get_scheduler
from diffusers.utils import check_min_version
from diffusers.utils.import_utils import is_xformers_available
from huggingface_hub import HfFolder, Repository, whoami
from huggingface_hub import HfApi
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
@ -749,16 +749,6 @@ class PromptDataset(Dataset):
return example
def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None):
if token is None:
token = HfFolder.get_token()
if organization is None:
username = whoami(token)["name"]
return f"{username}/{model_id}"
else:
return f"{organization}/{model_id}"
def main(args):
logging_dir = Path(args.output_dir, args.logging_dir)
@ -851,11 +841,13 @@ def main(args):
# Handle the repository creation
if accelerator.is_main_process:
if args.push_to_hub:
if args.hub_model_id is None:
repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token)
else:
repo_name = args.hub_model_id
repo = Repository(args.output_dir, clone_from=repo_name) # noqa: F841
api = HfApi(token=args.hub_token)
# Create repo (repo_name from args or inferred)
repo_name = args.hub_model_id
if repo_name is None:
repo_name = Path(args.output_dir).absolute().name
repo_id = api.create_repo(repo_name, exist_ok=True).repo_id
with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore:
if "step_*" not in gitignore:
@ -1252,7 +1244,12 @@ def main(args):
pipeline.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True)
api.upload_folder(
repo_id=repo_id,
folder_path=args.output_dir,
commit_message="End of training",
run_as_future=True,
)
accelerator.end_training()