Skip to content

Commit

Permalink
Decode bytes input to utf-8 string before passing to vllm engine (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
rmccorm4 authored Oct 5, 2023
1 parent 512ba53 commit 2e29214
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion Quick_Deploy/vLLM/model_repository/vllm/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ async def generate(self, request):
try:
request_id = random_uuid()
prompt = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()[0]
if isinstance(prompt, bytes):
prompt = prompt.decode("utf-8")
stream = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()[0]

# Request parameters are not yet supported via
Expand All @@ -184,7 +186,7 @@ async def generate(self, request):

last_output = None
async for output in self.llm_engine.generate(
str(prompt), sampling_params, request_id
prompt, sampling_params, request_id
):
if stream:
response_sender.send(self.create_response(output))
Expand Down

0 comments on commit 2e29214

Please sign in to comment.