Skip to content

Commit

Permalink
metric
Browse files Browse the repository at this point in the history
Signed-off-by: Cody Yu <[email protected]>
  • Loading branch information
comaniac committed Jan 10, 2025
1 parent aa1e77a commit 0df68e0
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
21 changes: 20 additions & 1 deletion vllm/v1/core/kv_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from vllm.logger import init_logger
from vllm.utils import cdiv
from vllm.v1.core.kv_cache_utils import (BlockHashType, FreeKVCacheBlockQueue,
KVCacheBlock,
KVCacheBlock, PrefixCachingMetrics,
generate_block_hash_extra_keys,
hash_block_tokens,
hash_request_tokens)
Expand Down Expand Up @@ -69,6 +69,12 @@ def __init__(
# is finished.
self.req_to_blocks: Dict[str, List[KVCacheBlock]] = {}

# Prefix cache metrics.
self.prefix_caching_metrics: PrefixCachingMetrics = {
"query_total": 0,
"query_hit": 0,
}

def get_computed_blocks(self, request: Request) -> List[KVCacheBlock]:
"""Get the computed (cached) blocks for the request.
Note that the computed blocks must be full.
Expand Down Expand Up @@ -101,6 +107,8 @@ def get_computed_blocks(self, request: Request) -> List[KVCacheBlock]:
else:
break

self.prefix_caching_metrics["query_total"] += len(block_hashes)
self.prefix_caching_metrics["query_hit"] += len(computed_blocks)
return computed_blocks

def append_slots(
Expand Down Expand Up @@ -328,6 +336,17 @@ def get_num_common_prefix_blocks(
break
return num_common_blocks

def get_prefix_caching_hit_rate(self) -> float:
"""Get the hit rate of prefix caching.
Returns:
The hit rate of prefix caching.
"""
if self.prefix_caching_metrics["query_total"] == 0:
return 0.0
return self.prefix_caching_metrics[
"query_hit"] / self.prefix_caching_metrics["query_total"]

def _get_new_blocks(self, num_blocks: int) -> List[KVCacheBlock]:
"""Get new blocks from the free block pool.
Expand Down
12 changes: 11 additions & 1 deletion vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""KV-Cache Utilities."""
from collections.abc import Sequence
from dataclasses import dataclass
from typing import Any, List, NamedTuple, Optional, Tuple
from typing import Any, List, NamedTuple, Optional, Tuple, TypedDict

from vllm.logger import init_logger
from vllm.v1.request import Request
Expand All @@ -24,6 +24,16 @@ class BlockHashType(NamedTuple):
extra_keys: Optional[Any] = None


class PrefixCachingMetrics(TypedDict):
"""Metrics for prefix caching."""

query_total: int
"""The total number of queries."""

query_hit: int
"""The number of queries that hit the prefix cache."""


@dataclass
class KVCacheBlock:
"""KV-cache block metadata."""
Expand Down
9 changes: 8 additions & 1 deletion vllm/v1/engine/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,17 @@ def _log_stats(self):
now = time.time()

if now - self._last_logging_time > LOGGING_TIME_S:
prefix_caching_hit_rate = ""
if (hit_rate := self.scheduler.kv_cache_manager.
get_prefix_caching_hit_rate()) > 0:
prefix_caching_hit_rate = (
f" | PrefixCachingHitRate: {hit_rate:.2f}")

logger.info(
"RUNNING: %s | WAITING: %s",
"RUNNING: %s | WAITING: %s%s",
len(self.scheduler.running),
len(self.scheduler.waiting),
prefix_caching_hit_rate,
)

self._last_logging_time = now
Expand Down

0 comments on commit 0df68e0

Please sign in to comment.