Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update LoRA.py #4184

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions modules/LoRA.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ def add_lora_to_model(lora_names):
add_lora_autogptq(lora_names)
elif shared.model.__class__.__name__ in ['ExllamaModel', 'ExllamaHF'] or shared.args.loader == 'ExLlama':
add_lora_exllama(lora_names)
elif shared.model.__class__.__name__ in ['LlamaCppModel']:
add_lora_llamacpp(lora_names)
else:
add_lora_transformers(lora_names)

Expand Down Expand Up @@ -96,6 +98,64 @@ def add_lora_autogptq(lora_names):
return


def add_lora_llamacpp(lora_names):
import llama_cpp

if len(lora_names) == 0:
shared.lora_names = []
return
else:
if len(lora_names) > 1:
logger.warning('Llama can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')

lora_path = get_lora_path(lora_names[0])
lora_adapter_path = str(lora_path / "ggml-adapter-model.bin")

logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]])))

if shared.args.tensor_split is None or shared.args.tensor_split.strip() == '':
tensor_split_list = None
else:
tensor_split_list = [float(x) for x in shared.args.tensor_split.strip().split(",")]

params = {
'model_path': str(shared.model.model.model_path),
'lora_path': str(lora_adapter_path),
'n_ctx': shared.args.n_ctx,
'seed': int(shared.args.llama_cpp_seed),
'n_threads': shared.args.threads or None,
'n_threads_batch': shared.args.threads_batch or None,
'n_batch': shared.args.n_batch,
'use_mmap': not shared.args.no_mmap,
'use_mlock': shared.args.mlock,
'mul_mat_q': shared.args.mul_mat_q,
'numa': shared.args.numa,
'n_gpu_layers': shared.args.n_gpu_layers,
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
'tensor_split': tensor_split_list,
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
}

shared.model.model = llama_cpp.Llama(**params)

# shared.model.model.lora_path = lora_adapter_path
# if llama_cpp.llama_model_apply_lora_from_file(
# shared.model.model.model,
# shared.model.model.lora_path.encode("utf-8"),
# scale=1,
# path_base_model=shared.model.model.lora_base.encode("utf-8") if shared.model.model.lora_base is not None else llama_cpp.c_char_p(0),
# n_threads=shared.model.model.n_threads,
# ):
# raise RuntimeError(
# f"Failed to apply LoRA from lora path: {shared.model.lora_path} to base path: {shared.model.lora_base}"
# )

shared.lora_names = [lora_names[0]]

logger.info(f"Succesfully Applied Lora {lora_adapter_path} to Model.")

return
Comment on lines +104 to +157
Copy link

@Jamim Jamim Dec 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello @srossitto79,
I have a few suggestions here:

  • you can just check lora_names with not
  • while you already have return there, you don't need to have the else block. You can just remove an else and decrease the indentation accordingly
  • using an additional variable for the lora_name might be reasonable
  • ', '.join([lora_names[0]]) makes no sense because the result will be equal to lora_names[0] since there is only one element in the list
  • the return at the end is also redundant.
Suggested change
if len(lora_names) == 0:
shared.lora_names = []
return
else:
if len(lora_names) > 1:
logger.warning('Llama can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
lora_path = get_lora_path(lora_names[0])
lora_adapter_path = str(lora_path / "ggml-adapter-model.bin")
logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]])))
if shared.args.tensor_split is None or shared.args.tensor_split.strip() == '':
tensor_split_list = None
else:
tensor_split_list = [float(x) for x in shared.args.tensor_split.strip().split(",")]
params = {
'model_path': str(shared.model.model.model_path),
'lora_path': str(lora_adapter_path),
'n_ctx': shared.args.n_ctx,
'seed': int(shared.args.llama_cpp_seed),
'n_threads': shared.args.threads or None,
'n_threads_batch': shared.args.threads_batch or None,
'n_batch': shared.args.n_batch,
'use_mmap': not shared.args.no_mmap,
'use_mlock': shared.args.mlock,
'mul_mat_q': shared.args.mul_mat_q,
'numa': shared.args.numa,
'n_gpu_layers': shared.args.n_gpu_layers,
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
'tensor_split': tensor_split_list,
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
}
shared.model.model = llama_cpp.Llama(**params)
# shared.model.model.lora_path = lora_adapter_path
# if llama_cpp.llama_model_apply_lora_from_file(
# shared.model.model.model,
# shared.model.model.lora_path.encode("utf-8"),
# scale=1,
# path_base_model=shared.model.model.lora_base.encode("utf-8") if shared.model.model.lora_base is not None else llama_cpp.c_char_p(0),
# n_threads=shared.model.model.n_threads,
# ):
# raise RuntimeError(
# f"Failed to apply LoRA from lora path: {shared.model.lora_path} to base path: {shared.model.lora_base}"
# )
shared.lora_names = [lora_names[0]]
logger.info(f"Succesfully Applied Lora {lora_adapter_path} to Model.")
return
if not lora_names:
shared.lora_names = []
return
if len(lora_names) > 1:
logger.warning('Llama can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
lora_name = lora_names[0]
lora_path = get_lora_path(lora_name)
lora_adapter_path = str(lora_path / "ggml-adapter-model.bin")
logger.info(f"Applying the following LoRAs to {shared.model_name}: {lora_name}")
if not (shared.args.tensor_split and shared.args.tensor_split.strip()):
tensor_split_list = None
else:
tensor_split_list = [float(x) for x in shared.args.tensor_split.strip().split(",")]
params = {
'model_path': str(shared.model.model.model_path),
'lora_path': str(lora_adapter_path),
'n_ctx': shared.args.n_ctx,
'seed': int(shared.args.llama_cpp_seed),
'n_threads': shared.args.threads or None,
'n_threads_batch': shared.args.threads_batch or None,
'n_batch': shared.args.n_batch,
'use_mmap': not shared.args.no_mmap,
'use_mlock': shared.args.mlock,
'mul_mat_q': shared.args.mul_mat_q,
'numa': shared.args.numa,
'n_gpu_layers': shared.args.n_gpu_layers,
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
'tensor_split': tensor_split_list,
'rope_freq_scale': 1.0 / shared.args.compress_pos_emb,
}
shared.model.model = llama_cpp.Llama(**params)
# shared.model.model.lora_path = lora_adapter_path
# if llama_cpp.llama_model_apply_lora_from_file(
# shared.model.model.model,
# shared.model.model.lora_path.encode("utf-8"),
# scale=1,
# path_base_model=shared.model.model.lora_base.encode("utf-8") if shared.model.model.lora_base is not None else llama_cpp.c_char_p(0),
# n_threads=shared.model.model.n_threads,
# ):
# raise RuntimeError(
# f"Failed to apply LoRA from lora path: {shared.model.lora_path} to base path: {shared.model.lora_base}"
# )
shared.lora_names = [lora_name]
logger.info(f"Succesfully Applied Lora {lora_adapter_path} to Model.")


def add_lora_transformers(lora_names):
prior_set = set(shared.lora_names)
added_set = set(lora_names) - prior_set
Expand Down