Skip to content

Commit

Permalink
Embedding gpu (#14253)
Browse files Browse the repository at this point in the history
  • Loading branch information
NickM-27 authored Oct 10, 2024
1 parent 9fda259 commit bc3a061
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 33 deletions.
11 changes: 0 additions & 11 deletions docker/main/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,6 @@ RUN /build_pysqlite3.sh
COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt

COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt


# Collect deps in a single layer
FROM scratch AS deps-rootfs
Expand Down Expand Up @@ -225,14 +222,6 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl

# We have to uninstall this dependency specifically
# as it will break onnxruntime-openvino
RUN pip3 uninstall -y onnxruntime

RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
python3 -m pip install --upgrade pip && \
pip3 install -U /deps/wheels/*.whl

COPY --from=deps-rootfs / /

RUN ldconfig
Expand Down
3 changes: 0 additions & 3 deletions docker/main/requirements-wheels-post.txt

This file was deleted.

5 changes: 3 additions & 2 deletions docker/main/requirements-wheels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ norfair == 2.2.*
setproctitle == 1.3.*
ws4py == 0.5.*
unidecode == 1.3.*
# OpenVino (ONNX installed in wheels-post)
# OpenVino & ONNX
openvino == 2024.3.*
onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
onnxruntime == 1.19.* ; platform_machine == 'aarch64'
# Embeddings
transformers == 4.45.*
onnx_clip == 4.0.*
# Generative AI
google-generativeai == 0.8.*
ollama == 0.3.*
Expand Down
6 changes: 6 additions & 0 deletions docs/docs/configuration/semantic_search.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ If you are enabling the Search feature for the first time, be advised that Friga

### Jina AI CLIP

:::tip

The CLIP models are downloaded in ONNX format, which means they will be accelerated using GPU hardware when available. This depends on the Docker build that is used. See [the object detector docs](../configuration/object_detectors.md) for more information.

:::

The vision model is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails.

The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.
Expand Down
3 changes: 1 addition & 2 deletions frigate/embeddings/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def jina_vision_embedding_function(outputs):
},
embedding_function=jina_text_embedding_function,
model_type="text",
preferred_providers=["CPUExecutionProvider"],
force_cpu=True,
)

self.vision_embedding = GenericONNXEmbedding(
Expand All @@ -130,7 +130,6 @@ def jina_vision_embedding_function(outputs):
},
embedding_function=jina_vision_embedding_function,
model_type="vision",
preferred_providers=["CPUExecutionProvider"],
)

def _create_tables(self):
Expand Down
16 changes: 10 additions & 6 deletions frigate/embeddings/functions/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from frigate.const import MODEL_CACHE_DIR, UPDATE_MODEL_STATE
from frigate.types import ModelStatusTypesEnum
from frigate.util.downloader import ModelDownloader
from frigate.util.model import get_ort_providers

warnings.filterwarnings(
"ignore",
Expand All @@ -40,16 +41,18 @@ def __init__(
download_urls: Dict[str, str],
embedding_function: Callable[[List[np.ndarray]], np.ndarray],
model_type: str,
preferred_providers: List[str] = ["CPUExecutionProvider"],
tokenizer_file: Optional[str] = None,
force_cpu: bool = False,
):
self.model_name = model_name
self.model_file = model_file
self.tokenizer_file = tokenizer_file
self.download_urls = download_urls
self.embedding_function = embedding_function
self.model_type = model_type # 'text' or 'vision'
self.preferred_providers = preferred_providers
self.providers, self.provider_options = get_ort_providers(
force_cpu=force_cpu, requires_fp16=True
)

self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)
self.tokenizer = None
Expand Down Expand Up @@ -105,8 +108,7 @@ def _load_model_and_tokenizer(self):
else:
self.feature_extractor = self._load_feature_extractor()
self.session = self._load_model(
os.path.join(self.download_path, self.model_file),
self.preferred_providers,
os.path.join(self.download_path, self.model_file)
)

def _load_tokenizer(self):
Expand All @@ -123,9 +125,11 @@ def _load_feature_extractor(self):
f"{MODEL_CACHE_DIR}/{self.model_name}",
)

def _load_model(self, path: str, providers: List[str]):
def _load_model(self, path: str):
if os.path.exists(path):
return ort.InferenceSession(path, providers=providers)
return ort.InferenceSession(
path, providers=self.providers, provider_options=self.provider_options
)
else:
logger.warning(f"{self.model_name} model file {path} not found.")
return None
Expand Down
23 changes: 14 additions & 9 deletions frigate/util/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def get_ort_providers(
force_cpu: bool = False, openvino_device: str = "AUTO"
force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False
) -> tuple[list[str], list[dict[str, any]]]:
if force_cpu:
return (["CPUExecutionProvider"], [{}])
Expand All @@ -17,14 +17,19 @@ def get_ort_providers(
for provider in providers:
if provider == "TensorrtExecutionProvider":
os.makedirs("/config/model_cache/tensorrt/ort/trt-engines", exist_ok=True)
options.append(
{
"trt_timing_cache_enable": True,
"trt_engine_cache_enable": True,
"trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
}
)

if not requires_fp16 or os.environ.get("USE_FP_16", "True") != "False":
options.append(
{
"trt_fp16_enable": requires_fp16,
"trt_timing_cache_enable": True,
"trt_engine_cache_enable": True,
"trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
"trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
}
)
else:
options.append({})
elif provider == "OpenVINOExecutionProvider":
os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)
options.append(
Expand Down

0 comments on commit bc3a061

Please sign in to comment.