From e3a81db0bb67748b074159768dc92c9eb3d8e5e0 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Mon, 7 Oct 2024 15:20:45 -0500
Subject: [PATCH] add scale_factor and bias to description zscore normalization
---
frigate/embeddings/__init__.py | 2 +-
frigate/embeddings/functions/minilm_l6_v2.py | 4 +--
frigate/embeddings/util.py | 14 ++++++++---
web/src/views/search/SearchView.tsx | 26 +++-----------------
4 files changed, 17 insertions(+), 29 deletions(-)
diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py
index 381d95ed19..aa7590994f 100644
--- a/frigate/embeddings/__init__.py
+++ b/frigate/embeddings/__init__.py
@@ -73,7 +73,7 @@ class EmbeddingsContext:
def __init__(self, db: SqliteVecQueueDatabase):
self.embeddings = Embeddings(db)
self.thumb_stats = ZScoreNormalization()
- self.desc_stats = ZScoreNormalization()
+ self.desc_stats = ZScoreNormalization(scale_factor=2.5, bias=0.5)
# load stats from disk
try:
diff --git a/frigate/embeddings/functions/minilm_l6_v2.py b/frigate/embeddings/functions/minilm_l6_v2.py
index a3a8b45b3a..5245edcdc9 100644
--- a/frigate/embeddings/functions/minilm_l6_v2.py
+++ b/frigate/embeddings/functions/minilm_l6_v2.py
@@ -46,7 +46,7 @@ def _download_model(self, path: str):
elif os.path.basename(path) == self.TOKENIZER_FILE:
logger.info("Downloading MiniLM tokenizer")
tokenizer = AutoTokenizer.from_pretrained(
- self.MODEL_NAME, clean_up_tokenization_spaces=False
+ self.MODEL_NAME, clean_up_tokenization_spaces=True
)
tokenizer.save_pretrained(path)
@@ -78,7 +78,7 @@ def _load_model_and_tokenizer(self):
def _load_tokenizer(self):
tokenizer_path = os.path.join(self.DOWNLOAD_PATH, self.TOKENIZER_FILE)
return AutoTokenizer.from_pretrained(
- tokenizer_path, clean_up_tokenization_spaces=False
+ tokenizer_path, clean_up_tokenization_spaces=True
)
def _load_model(self, path: str, providers: List[str]):
diff --git a/frigate/embeddings/util.py b/frigate/embeddings/util.py
index 7550716c93..0b2acd4d67 100644
--- a/frigate/embeddings/util.py
+++ b/frigate/embeddings/util.py
@@ -4,12 +4,15 @@
class ZScoreNormalization:
- """Running Z-score normalization for search distance."""
-
- def __init__(self):
+ def __init__(self, scale_factor: float = 1.0, bias: float = 0.0):
+ """Initialize with optional scaling and bias adjustments."""
+ """scale_factor adjusts the magnitude of each score"""
+ """bias will artificially shift the entire distribution upwards"""
self.n = 0
self.mean = 0
self.m2 = 0
+ self.scale_factor = scale_factor
+ self.bias = bias
@property
def variance(self):
@@ -23,7 +26,10 @@ def normalize(self, distances: list[float]):
self._update(distances)
if self.stddev == 0:
return distances
- return [(x - self.mean) / self.stddev for x in distances]
+ return [
+ (x - self.mean) / self.stddev * self.scale_factor + self.bias
+ for x in distances
+ ]
def _update(self, distances: list[float]):
for x in distances:
diff --git a/web/src/views/search/SearchView.tsx b/web/src/views/search/SearchView.tsx
index 7e77c20b8d..27090bb825 100644
--- a/web/src/views/search/SearchView.tsx
+++ b/web/src/views/search/SearchView.tsx
@@ -189,19 +189,9 @@ export default function SearchView({
// confidence score - probably needs tweaking
- const zScoreToConfidence = (score: number, source: string) => {
- let midpoint, scale;
-
- if (source === "thumbnail") {
- midpoint = 2;
- scale = 0.5;
- } else {
- midpoint = 0.5;
- scale = 1.5;
- }
-
+ const zScoreToConfidence = (score: number) => {
// Sigmoid function: 1 / (1 + e^x)
- const confidence = 1 / (1 + Math.exp((score - midpoint) * scale));
+ const confidence = 1 / (1 + Math.exp(score));
return Math.round(confidence * 100);
};
@@ -412,21 +402,13 @@ export default function SearchView({
) : (
)}
- {zScoreToConfidence(
- value.search_distance,
- value.search_source,
- )}
- %
+ {zScoreToConfidence(value.search_distance)}%
Matched {value.search_source} at{" "}
- {zScoreToConfidence(
- value.search_distance,
- value.search_source,
- )}
- %
+ {zScoreToConfidence(value.search_distance)}%