diff --git a/README.md b/README.md index df9a254f52..799c50194b 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog +[25/01/15] We supported fine-tuning the **[InternLM3](https://huggingface.co/collections/internlm/)** models. Thank [@hhaAndroid](https://github.com/hhaAndroid)'s PR. [25/01/10] We supported fine-tuning the **[Phi-4](https://huggingface.co/microsoft/phi-4)** model. @@ -201,6 +202,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [InternLM3](https://huggingface.co/internlm) | 8B | intern3 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | | [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | diff --git a/README_zh.md b/README_zh.md index c9f28c49b7..2f4b39973e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -89,6 +89,8 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 ## 更新日志 +[25/01/15] 我们支持了 **[InternLM3](https://huggingface.co/collections/internlm/)** 模型的微调。感谢 [@hhaAndroid](https://github.com/hhaAndroid) 的 PR。 + [25/01/10] 我们支持了 **[Phi-4](https://huggingface.co/microsoft/phi-4)** 模型的微调。 [24/12/21] 我们支持了使用 **[SwanLab](https://github.com/SwanHubX/SwanLab)** 跟踪与可视化实验。详细用法请参考 [此部分](#使用-swanlab-面板)。 @@ -202,6 +204,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 | | [Index](https://huggingface.co/IndexTeam) | 1.9B | index | | [InternLM2/InternLM2.5](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [InternLM3](https://huggingface.co/internlm) | 8B | intern3 | | [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | | [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | | [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 65c1795622..07be3cecf2 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -732,6 +732,14 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: stop_words=["<|im_end|>"], ) +_register_template( + name="intern3", + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), + stop_words=["<|im_end|>"] +) _register_template( name="llama2", diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 7ef0d9f469..2cf002059f 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -817,6 +817,15 @@ def register_model_group( template="intern2", ) +register_model_group( + models={ + "InternLM3-8B-Chat": { + DownloadSource.DEFAULT: "internlm/internlm3-8b-instruct", + DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm3-8b-instruct", + }, + }, + template="intern3", +) register_model_group( models={ diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 2a7e5ddf8b..4dc7e1b5b8 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -24,6 +24,7 @@ from ..extras import logging from ..extras.misc import infer_optim_dtype +from ..extras.packages import is_transformers_version_greater_than from .model_utils.attention import configure_attn_implementation, print_attn_implementation from .model_utils.checkpointing import prepare_model_for_training from .model_utils.embedding import resize_embedding_layer @@ -117,6 +118,9 @@ def patch_config( if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []): raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf") + if getattr(config, "model_type", None) == "internlm3" and not is_transformers_version_greater_than("4.47.1"): + raise RuntimeError("InternLM3 model requires transformers >= 4.47.1, please upgrade it.") + # deepspeed zero3 is not compatible with low_cpu_mem_usage init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())