Skip to content

Commit

Permalink
refactor: Remove explicit callings to garbage collect (#55)
Browse files Browse the repository at this point in the history
Co-authored-by: Neelay Shah <[email protected]>
  • Loading branch information
kthui and nnshah1 authored Aug 6, 2024
1 parent 128abc3 commit a345a1d
Showing 1 changed file with 6 additions and 7 deletions.
13 changes: 6 additions & 7 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ def response_loop(self):
if item is None:
break
response_sender, response, response_flag = item
del item
try:
response_sender.send(response, response_flag)
except Exception as e:
Expand All @@ -298,9 +297,6 @@ def response_loop(self):
finally:
if response_flag == pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL:
self.ongoing_request_count -= 1
del response_sender
if self.ongoing_request_count == 0:
gc.collect()

def create_response(self, vllm_output, prepend_input):
"""
Expand Down Expand Up @@ -447,9 +443,6 @@ async def generate(self, request):
finally:
if decrement_ongoing_request_count:
self.ongoing_request_count -= 1
del response_sender
if self.ongoing_request_count == 0:
gc.collect()

def verify_loras(self, request):
# We will check if the requested lora exists here, if not we will send a
Expand Down Expand Up @@ -527,3 +520,9 @@ def finalize(self):
if self._response_thread is not None:
self._response_thread.join()
self._response_thread = None

# When using parallel tensors, the stub process may not shutdown due to
# unreleased references, so manually run the garbage collector once.
self.logger.log_info("[vllm] Running Garbage Collector on finalize...")
gc.collect()
self.logger.log_info("[vllm] Garbage Collector on finalize... done")

0 comments on commit a345a1d

Please sign in to comment.