diff --git a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py index 855ab6381ee..16badf48b2f 100755 --- a/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py +++ b/src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py @@ -15,6 +15,7 @@ import argparse import os +from pathlib import Path import transformers @@ -69,6 +70,15 @@ def convert_slow_checkpoint_to_fast(tokenizer_name, checkpoint_name, dump_path, if "/" in checkpoint: checkpoint_directory, checkpoint_prefix_name = checkpoint.split("/") dump_path_full = os.path.join(dump_path, checkpoint_directory) + + # Security check + try: + Path(dump_path_full).resolve().relative_to(Path(dump_path).resolve()) + except ValueError: + raise ValueError( + f"Invalid checkpoint path: '{checkpoint}' attempts to escape `dump_path`: {dump_path}" + ) + elif add_prefix: checkpoint_prefix_name = checkpoint dump_path_full = dump_path