diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 2ce84e8602..7b3676ddc9 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -24,6 +24,7 @@ from ..extras import logging from ..extras.misc import infer_optim_dtype +from ..extras.packages import is_transformers_version_greater_than from .model_utils.attention import configure_attn_implementation, print_attn_implementation from .model_utils.checkpointing import prepare_model_for_training from .model_utils.embedding import resize_embedding_layer @@ -113,6 +114,9 @@ def patch_config( if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []): raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf") + if getattr(config, "model_type", None) == "internlm3" and not is_transformers_version_greater_than("4.47.1"): + raise RuntimeError("InternLM3 model requires transformers >= 4.47.1, please upgrade it.") + # deepspeed zero3 is not compatible with low_cpu_mem_usage init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())