diff --git a/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py b/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py index b0751e55430..cda6218f13c 100644 --- a/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py +++ b/python/llm/src/bigdl/llm/vllm/model_executor/models/bigdl_llama.py @@ -206,7 +206,6 @@ def forward( position_ids = torch.tensor(decoding_position_ids).long().unsqueeze(-1) kwargs = { "input_ids": bigdl_input_ids, - # gc(co): we rely on underlying model to generate position_ids "position_ids": position_ids, "attention_mask": attention_mask, "past_key_values": bigdl_kv_cache,