@ -342,6 +342,7 @@ max_length: 1024
|
||||
| reserved_label_len | 要将检查点保存到的输出目录。 | int | 1 | 可选 |
|
||||
| ignore_pad_token_for_loss | 检查点保存的迭代间隔。 | bool | True | 可选 |
|
||||
| packing | 训练时是否对数据进行packing。 | bool | False | 可选 |
|
||||
| remove_unused_columns | 是否移除数据集中未使用的列。 | bool | True | 可选 |
|
||||
|
||||
## 训练参数配置
|
||||
|
||||
|
@ -10,6 +10,7 @@ deepspeed: examples/deepspeed/ds_z2_config.json
|
||||
# dataset
|
||||
dataset: mllm
|
||||
cutoff_len: 1024
|
||||
remove_unused_columns: False
|
||||
|
||||
# output
|
||||
output_dir: saves/qwen2_vl_7b_full
|
||||
|
@ -292,6 +292,12 @@ def _add_data_args(parser):
|
||||
group.add_argument("--packing", type=str2bool, default=False, help="Enable sequences packing in training.")
|
||||
group.add_argument("--default_system", type=str, default=None, help="The default system of template to use.")
|
||||
group.add_argument("--tool_format", type=str, default=None, help="The tool format of template to use.")
|
||||
group.add_argument(
|
||||
"--remove_unused_columns",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="Whether or not to remove unused columns.",
|
||||
)
|
||||
|
||||
return parser
|
||||
|
||||
|
@ -16,6 +16,15 @@
|
||||
},
|
||||
"template": "qwen"
|
||||
},
|
||||
"qwen2.5_vl": {
|
||||
"models": {
|
||||
"Qwen2.5-VL-7B-Instruct": {
|
||||
"modelers": "PyTorch-NPU/Qwen2.5-VL-7B-Instruct",
|
||||
"huggingface": "Qwen/Qwen2.5-VL-7B-Instruct"
|
||||
}
|
||||
},
|
||||
"template": "qwen2_vl"
|
||||
},
|
||||
|
||||
"qwen2": {
|
||||
"models": {
|
||||
|
@ -89,11 +89,10 @@
|
||||
},
|
||||
{
|
||||
"name": "qwen2_vl",
|
||||
"system_template": "<|im_start|>system\n{{content}}<|im_end|>\n",
|
||||
"user_template": "<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n",
|
||||
"assistant_template": "{{content}}<|im_end|>\n",
|
||||
"system_template": "<|im_start|>system\n{content}<|im_end|>\n",
|
||||
"user_template": "<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n",
|
||||
"assistant_template": "{content}<|im_end|>\n",
|
||||
"default_system": "You are a helpful assistant.",
|
||||
"separator_template": "<|im_end|>",
|
||||
"mm_plugin": "{'plugin_name':'qwen2_vl','image_token':'<|image_pad|>','video_token':'<|video_pad|>'}"
|
||||
}
|
||||
]
|
@ -36,7 +36,7 @@ from transformers import (
|
||||
)
|
||||
from transformers.dynamic_module_utils import get_relative_imports
|
||||
from transformers.integrations import is_deepspeed_zero3_enabled
|
||||
from openmind.utils import logging
|
||||
from openmind.utils import logging, is_torch_npu_available
|
||||
from openmind.integrations.transformers.npu_fused_ops.sdk import SUPPORTED_FUSED_MODELS, map_fused_kernel_to_model
|
||||
from openmind.flow.arguments import get_args
|
||||
from openmind.flow.model.model_registry import SUPPORTED_MODELS
|
||||
@ -256,6 +256,12 @@ def patch_config(config):
|
||||
delattr(config, "use_npu_rms_norm")
|
||||
|
||||
|
||||
def disable_internal_format():
|
||||
if is_torch_npu_available():
|
||||
# Fix illegal format for Conv3DBackpropFilter on npu.
|
||||
torch.npu.config.allow_internal_format = False
|
||||
|
||||
|
||||
def get_model():
|
||||
r"""
|
||||
Loads pretrained model.
|
||||
@ -316,6 +322,7 @@ def get_model():
|
||||
|
||||
if type(config) in AutoModelForVision2Seq._model_mapping.keys(): # assume built-in models
|
||||
load_class = AutoModelForVision2Seq
|
||||
disable_internal_format()
|
||||
|
||||
else:
|
||||
load_class = AutoModelForCausalLM
|
||||
|
@ -37,6 +37,7 @@ def run_sft(
|
||||
data_collator = SFTDataCollatorWith4DAttentionMask(
|
||||
template=template,
|
||||
tokenizer=tokenizer,
|
||||
model=model,
|
||||
processor=processor,
|
||||
padding="max_length" if args.max_length else True,
|
||||
pad_to_multiple_of=8 if args.do_train else None,
|
||||
|
Reference in New Issue
Block a user