Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ App参数继承于[部署参数](#部署参数), [Web-UI参数](#Web-UI参数)
- to_ollama: 产生ollama所需的Modelfile文件。默认为False。
- 🔥to_mcore: HF格式权重转成Megatron格式。默认为False。
- to_hf: Megatron格式权重转成HF格式。默认为False。
- export_language_model_only: 只导出多模态模型中的语言模型部分,并写出文本模型配置。适用于只接受CausalLM格式权重名的文本推理后端。默认为False。
- mcore_model: mcore格式模型路径。默认为None。
- mcore_adapter: mcore格式模型的adapter路径,默认为None。
- thread_count: `--to_mcore true`时的模型切片数。默认为None,根据模型大小自动设置,使得最大分片小于10GB。
Expand Down
1 change: 1 addition & 0 deletions docs/source_en/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,7 @@ Export Arguments include the [basic arguments](#base-arguments) and [merge argum
- to_ollama: Generate the Modelfile required by Ollama. Default is False.
- 🔥to_mcore: Convert weights from HF format to Megatron format. Default is False.
- to_hf: Convert weights from Megatron format to HF format. Default is False.
- export_language_model_only: Export only the language model part of a multimodal model and write a text model config. This is intended for text-only inference backends that expect CausalLM-style weight names. Default is False.
- mcore_model: Path to the mcore format model. Default is None.
- mcore_adapter: The adapter path for mcore format models, default is None.
- thread_count: The number of model slices when `--to_mcore true` is set. Defaults to None, and is automatically configured based on the model size, ensuring that the largest slice is less than 10GB.
Expand Down
12 changes: 12 additions & 0 deletions swift/arguments/export_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class ExportArguments(MergeArguments, BaseArguments):
to_ollama (bool): Whether to generate the `Modelfile` required by Ollama. Defaults to False.
to_mcore (bool): Whether to convert Hugging Face format weights to Megatron-Core format. Defaults to False.
to_hf (bool): Whether to convert Megatron-Core format weights to Hugging Face format. Defaults to False.
export_language_model_only (bool): Whether to export only the language model part of a multimodal model.
This is useful when a text-only inference backend expects CausalLM-style weight names. Defaults to False.
mcore_model (Optional[str]): The path to the Megatron-Core format model. Defaults to None.
mcore_adapter (Optional[str]): A list of adapter paths for the Megatron-Core format model. Defaults to [].
thread_count (Optional[int]): The number of model shards when `to_mcore` is True. Defaults to None, which
Expand Down Expand Up @@ -66,6 +68,7 @@ class ExportArguments(MergeArguments, BaseArguments):
# megatron
to_mcore: bool = False
to_hf: bool = False
export_language_model_only: bool = False
mcore_model: Optional[str] = None
mcore_adapter: Optional[str] = None
thread_count: Optional[int] = None
Expand Down Expand Up @@ -101,6 +104,10 @@ def _init_output_dir(self):
suffix = 'ollama'
elif self.merge_lora:
suffix = 'merged'
if self.export_language_model_only:
suffix += '-language-model'
elif self.export_language_model_only:
suffix = 'language-model'
elif self.to_mcore:
suffix = 'mcore'
elif self.to_hf:
Expand All @@ -126,6 +133,11 @@ def __post_init__(self):
raise ValueError('Please specify `--quant_bits`.')
if self.quant_method in {'gptq', 'awq'} and self.torch_dtype is None:
self.torch_dtype = torch.float16
if self.export_language_model_only and (
self.quant_method or self.to_ollama or self.to_mcore or self.to_hf or self.to_cached_dataset
or self.to_peft_format):
raise ValueError(
'`export_language_model_only` only supports regular checkpoint export or merge-lora export.')
if self.to_mcore or self.to_hf:
if self.merge_lora:
self.merge_lora = False
Expand Down
132 changes: 127 additions & 5 deletions swift/model/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# Copyright (c) ModelScope Contributors. All rights reserved.
import copy
import os
import shutil
import torch
import torch.nn.functional as F
from accelerate.utils import find_device
from collections import OrderedDict
from functools import wraps
from packaging import version
from peft import PeftModel
Expand All @@ -13,7 +15,7 @@
from transformers.utils import (is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_mps_available,
is_torch_npu_available, strtobool)
from types import MethodType
from typing import List, Optional, TypeVar, Union
from typing import Dict, List, Optional, TypeVar, Union

from swift.utils import (HfConfigFactory, Processor, deep_getattr, get_dist_setting, get_env_args, get_logger, is_mp,
to_device)
Expand Down Expand Up @@ -286,32 +288,152 @@ def forward(self, x):
_patch_conv3d()


def _get_language_model_prefixes(model: PreTrainedModel) -> List[str]:
model_arch = getattr(getattr(model, 'model_meta', None), 'model_arch', None)
prefixes = getattr(model_arch, 'language_model', None) or []
Comment on lines +292 to +293

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

If model does not have a model_meta attribute (or if it is None), calling getattr(getattr(model, 'model_meta', None), 'model_arch', None) will raise an AttributeError because the first argument to the outer getattr will be None. We should safely retrieve model_meta first before attempting to access model_arch.

    model_meta = getattr(model, 'model_meta', None)
    model_arch = getattr(model_meta, 'model_arch', None) if model_meta is not None else None
    prefixes = getattr(model_arch, 'language_model', None) or []

if isinstance(prefixes, str):
prefixes = [prefixes]
return prefixes


def _get_language_model_target_prefix(source_prefix: str) -> str:
if source_prefix == 'language_model':
return 'model'
suffix = '.language_model'
if source_prefix.endswith(suffix):
return source_prefix[:-len(suffix)]
raise ValueError(
f'Cannot export language-model-only checkpoint for language_model prefix `{source_prefix}`. '
'Only `language_model` and `*.language_model` prefixes are currently supported.')


def get_language_model_state_dict(model: PreTrainedModel) -> Dict[str, torch.Tensor]:
prefixes = _get_language_model_prefixes(model)
if not prefixes:
raise ValueError('`export_language_model_only` requires a multimodal model with language_model prefixes.')

source_prefix = prefixes[0].rstrip('.')
target_prefix = _get_language_model_target_prefix(source_prefix)
state_dict = model.state_dict()
output_state_dict = OrderedDict()
for key, value in state_dict.items():
new_key = None
if key.startswith(f'{source_prefix}.'):
new_key = f'{target_prefix}.{key[len(source_prefix) + 1:]}'
elif key == source_prefix:
new_key = target_prefix
else:
for prefix in prefixes[1:]:
prefix = prefix.rstrip('.')
if key == prefix or key.startswith(f'{prefix}.'):
new_key = key
break
if new_key is None:
continue
if new_key in output_state_dict:
raise ValueError(f'Duplicate key `{new_key}` while exporting language-model-only checkpoint.')
output_state_dict[new_key] = value

if not output_state_dict:
raise ValueError(f'No language model weights found with prefixes: {prefixes}.')
return output_state_dict


def _infer_language_model_architectures(model: PreTrainedModel) -> Optional[List[str]]:
candidates = []
config = getattr(model, 'config', None)
if config is not None:
architectures = getattr(config, 'architectures', None) or []
candidates.extend(architectures)
candidates.append(model.__class__.__name__)

for arch in candidates:
if arch.endswith('ForCausalLM'):
return [arch]
if arch.endswith('ForConditionalGeneration'):
return [arch[:-len('ForConditionalGeneration')] + 'ForCausalLM']
return None


def get_language_model_config(model: PreTrainedModel) -> PretrainedConfig:
text_config = HfConfigFactory.get_text_config(model.config)
if text_config is model.config:
raise ValueError('`export_language_model_only` requires a multimodal config with a text config.')

text_config = copy.deepcopy(text_config)
architectures = _infer_language_model_architectures(model)
if architectures is not None:
text_config.architectures = architectures
return text_config


def save_language_model_checkpoint(model: PreTrainedModel,
output_dir: str,
*,
safe_serialization: bool = True,
max_shard_size: Union[int, str] = '5GB') -> None:
try:
from huggingface_hub import save_torch_state_dict
except ImportError as e:
raise ImportError('`export_language_model_only` requires `huggingface_hub.save_torch_state_dict`.') from e

os.makedirs(output_dir, exist_ok=True)
state_dict = get_language_model_state_dict(model)
text_config = get_language_model_config(model)
text_config.save_pretrained(output_dir)
generation_config = getattr(model, 'generation_config', None)
if generation_config is not None:
generation_config.save_pretrained(output_dir)
save_torch_state_dict(
state_dict,
output_dir,
max_shard_size=max_shard_size,
safe_serialization=safe_serialization,
metadata={'format': 'pt'} if safe_serialization else None)


def save_processor_checkpoint(processor: Processor, output_dir: str, *, language_model_only: bool = False) -> None:
if language_model_only:
tokenizer = getattr(processor, 'tokenizer', None)
if tokenizer is not None:
tokenizer.save_pretrained(output_dir)
return
processor.save_pretrained(output_dir)


def save_checkpoint(model: Optional[PreTrainedModel],
processor: Processor,
output_dir: str,
*,
safe_serialization: bool = True,
max_shard_size: Union[int, str] = '5GB',
model_dirs: List[str] = None,
additional_saved_files: Optional[List[str]] = None) -> None:
additional_saved_files: Optional[List[str]] = None,
language_model_only: bool = False) -> None:
if model is not None:
if model.__class__.__name__ != 'SentenceTransformer':
if language_model_only:
save_language_model_checkpoint(
model, output_dir, safe_serialization=safe_serialization, max_shard_size=max_shard_size)
elif model.__class__.__name__ != 'SentenceTransformer':
model.save_pretrained(output_dir, safe_serialization=safe_serialization, max_shard_size=max_shard_size)
else:
model.save_pretrained(output_dir, safe_serialization=safe_serialization)
# copy sentencetransformers files
from swift.utils import copy_files_by_pattern
copy_files_by_pattern(model.model_dir, output_dir, '*.py')
copy_files_by_pattern(model.model_dir, output_dir, '*.json')
processor.save_pretrained(output_dir)
save_processor_checkpoint(processor, output_dir, language_model_only=language_model_only)

if model_dirs is None:
model_dirs = []
else:
model_dirs = model_dirs.copy()
if model and model.model_dir and model.model_dir not in model_dirs:
model_dirs.append(model.model_dir)
for src_file in (additional_saved_files or []) + ['preprocessor_config.json', 'args.json']:
src_files = (additional_saved_files or []) + ['args.json']
if not language_model_only:
src_files.append('preprocessor_config.json')
for src_file in src_files:
tgt_path = os.path.join(output_dir, src_file)
if os.path.exists(tgt_path) and src_file == 'args.json':
continue
Expand Down
4 changes: 3 additions & 1 deletion swift/pipelines/export/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from swift.tuners import swift_to_peft_format
from swift.utils import get_logger
from .cached_dataset import export_cached_dataset
from .merge_lora import merge_lora
from .merge_lora import export_language_model, merge_lora
from .ollama import export_to_ollama
from .quant import quantize_model

Expand All @@ -27,6 +27,8 @@ def run(self):
args.output_dir = None
merge_lora(args)
args.output_dir = output_dir # recover
if args.export_language_model_only and not args.merge_lora:
export_language_model(args)
if args.quant_method:
quantize_model(args)
elif args.to_ollama:
Expand Down
32 changes: 31 additions & 1 deletion swift/pipelines/export/merge_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,40 @@ def merge_lora(args: ExportArguments, device_map=None, replace_if_exists=False)
safe_serialization=args.safe_serialization,
model_dirs=args.adapters,
max_shard_size=args.max_shard_size,
additional_saved_files=model.model_meta.additional_saved_files)
additional_saved_files=model.model_meta.additional_saved_files,
language_model_only=args.export_language_model_only)
logger.info(f'Successfully merged LoRA and saved in `{output_dir}`.')
args.device_map = origin_device_map

args.model = output_dir
args.model_dir = output_dir
args.adapters = []


def export_language_model(args: ExportArguments, device_map=None, replace_if_exists=False) -> None:
if replace_if_exists:
logger.info(f'replace_if_exists: {replace_if_exists}')
output_dir = args.output_dir
if os.path.exists(output_dir) and not replace_if_exists:
logger.info(f'The language model export already exists in {output_dir}, skipping the saving process.')
else:
origin_device_map = args.device_map
args.device_map = device_map or args.device_map
logger.info(f'export_device_map: {device_map}')
model, template = prepare_model_template(args)
logger.info('Saving language model weights...')
save_checkpoint(
model,
template.processor,
output_dir,
safe_serialization=args.safe_serialization,
model_dirs=[args.model_dir],
max_shard_size=args.max_shard_size,
additional_saved_files=model.model_meta.additional_saved_files,
language_model_only=True)
logger.info(f'Successfully exported language model weights in `{output_dir}`.')
args.device_map = origin_device_map

args.model = output_dir
args.model_dir = output_dir
args.adapters = []
Loading