diff --git a/pipeline/serve/model_worker.py b/pipeline/serve/model_worker.py index eab33055..70beedeb 100644 --- a/pipeline/serve/model_worker.py +++ b/pipeline/serve/model_worker.py @@ -183,12 +183,8 @@ def generate_stream(self, params): if images is not None: assert type(images) is list if len(images) > 0: - if type(images[0]) is list: # currently support single video only - images = images[-1] # reserve the last video - # Split the string from the right side using rsplit() - split_prompt = prompt.rsplit(DEFAULT_IMAGE_TOKEN, -1) - # Join the string back together, leaving out all occurrences of DEFAULT_IMAGE_TOKEN except the last one, reserve the last DEFAULT_IMAGE_TOKEN - prompt = DEFAULT_IMAGE_TOKEN.join(split_prompt[:-1]) + split_prompt[-1] + if type(images[0]) is list: # current support single video + images = images[-1] is_video = True else: is_video = False @@ -208,6 +204,7 @@ def generate_stream(self, params): vision_x = None streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) + logger.info(f"Input prompt: {prompt}") inputs = tokenizer( [prompt], return_tensors="pt",