diff --git a/.github/workflows/notebooks-cloud.yml b/.github/workflows/notebooks-cloud.yml
index f2bb5af2..4891acde 100644
--- a/.github/workflows/notebooks-cloud.yml
+++ b/.github/workflows/notebooks-cloud.yml
@@ -26,7 +26,7 @@ jobs:
- name: Set output variable (Make sure it is this quote format - "[path/to/notebook1.ipynb", "path/to/notebook2.ipynb]")
id: set_output
run: |
- notebooks=$(find docs/sphinx/source -name '*cloud.ipynb' ! -name 'pdf-retrieval-with-ColQwen2-vlm_Vespa-cloud.ipynb' ! -name 'mother-of-all-embedding-models-cloud.ipynb' ! -name 'scaling-personal-ai-assistants-with-streaming-mode-cloud.ipynb' ! -name 'colpali-benchmark-vqa-vlm_Vespa-cloud.ipynb' | jq -R -s -c 'split("\n")[:-1]')
+ notebooks=$(find docs/sphinx/source -name '*cloud.ipynb' ! -name 'pdf-retrieval-with-ColQwen2-vlm_Vespa-cloud.ipynb' ! -name 'mother-of-all-embedding-models-cloud.ipynb' ! -name 'scaling-personal-ai-assistants-with-streaming-mode-cloud.ipynb' ! -name 'colpali-benchmark-vqa-vlm_Vespa-cloud.ipynb' ! -name 'video_search_twelvelabs_cloud.ipynb' | jq -R -s -c 'split("\n")[:-1]')
# Print all notebooks echo
echo $notebooks
echo "notebooks=$notebooks" >> $GITHUB_OUTPUT
diff --git a/docs/sphinx/source/examples/video_search_twelvelabs_cloud.ipynb b/docs/sphinx/source/examples/video_search_twelvelabs_cloud.ipynb
new file mode 100644
index 00000000..61424efa
--- /dev/null
+++ b/docs/sphinx/source/examples/video_search_twelvelabs_cloud.ipynb
@@ -0,0 +1,3449 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "BXfYSnoaYyl4",
+ "metadata": {
+ "id": "BXfYSnoaYyl4"
+ },
+ "source": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "# Video Search and Retrieval with Vespa and TwelveLabs\n",
+ "\n",
+ "In the following notebook, we will demonstrate how to leverage [TwelveLabs](https://www.twelvelabs.io/) `Marengo-retrieval-2.7` a SOTA multimodal embedding model to demonstrate a use case of video embeddings storage and semantic search retrieval using Vespa.ai.\n",
+ "\n",
+ "The steps we will take in this notebook are:\n",
+ "\n",
+ "1. Setup and configuration\n",
+ "2. Generate Attributes and Embeddings for 3 sample videos using the TwelveLabs python SDK.\n",
+ "3. Deploy the Vespa application to Vespa Cloud and Feed the Data\n",
+ "4. Perform a semantic search with hybrid multi-phase ranking on the videos\n",
+ "5. Review the results\n",
+ "6. Cleanup\n",
+ "\n",
+ "All the steps that are needed to provision the Vespa application, including feeding the data, can be done by running this notebook.\n",
+ "We have tried to make it easy for others to run this notebook, to create your own Video semantic search application using TwelveLabs models with Vespa.\n",
+ "\n",
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]((https://colab.research.google.com/github/vespa-engine/pyvespa/blob/master/docs/sphinx/source/example/video_search_twelvelabs-cloud.ipynb))\n",
+ "\n",
+ "## 1. Setup and Configuration\n",
+ "\n",
+ "For reference, this is the Python version used for this notebook."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "C2muzvA8yyXFRL7zrDpEgpmJ",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "C2muzvA8yyXFRL7zrDpEgpmJ",
+ "outputId": "8d132f54-20e3-4f1d-f21b-ee666f07ff95",
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Python 3.11.11\n"
+ ]
+ }
+ ],
+ "source": [
+ "!python --version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "NPWxNjsXa5vd",
+ "metadata": {
+ "id": "NPWxNjsXa5vd"
+ },
+ "source": [
+ "### 1.1 Install libraries\n",
+ "\n",
+ "Install the required Python dependencies from TwelveLabs python SDK and pyvespa python API."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "OzBunhAMSMUF",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "OzBunhAMSMUF",
+ "outputId": "378d0d00-0f64-48d0-a570-567339e9be29"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Collecting pyvespa\n",
+ " Downloading pyvespa-0.53.0-py3-none-any.whl.metadata (18 kB)\n",
+ "Collecting vespacli\n",
+ " Downloading vespacli-8.476.30-py3-none-any.whl.metadata (15 kB)\n",
+ "Collecting twelvelabs\n",
+ " Downloading twelvelabs-0.4.4-py3-none-any.whl.metadata (16 kB)\n",
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from pyvespa) (2.32.3)\n",
+ "Requirement already satisfied: requests_toolbelt in /usr/local/lib/python3.11/dist-packages (from pyvespa) (1.0.0)\n",
+ "Collecting docker (from pyvespa)\n",
+ " Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from pyvespa) (3.1.5)\n",
+ "Requirement already satisfied: cryptography in /usr/local/lib/python3.11/dist-packages (from pyvespa) (43.0.3)\n",
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from pyvespa) (3.11.12)\n",
+ "Requirement already satisfied: httpx[http2] in /usr/local/lib/python3.11/dist-packages (from pyvespa) (0.28.1)\n",
+ "Requirement already satisfied: tenacity>=8.4.1 in /usr/local/lib/python3.11/dist-packages (from pyvespa) (9.0.0)\n",
+ "Requirement already satisfied: typing_extensions in /usr/local/lib/python3.11/dist-packages (from pyvespa) (4.12.2)\n",
+ "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.11/dist-packages (from pyvespa) (2.8.2)\n",
+ "Requirement already satisfied: fastcore>=1.7.8 in /usr/local/lib/python3.11/dist-packages (from pyvespa) (1.7.29)\n",
+ "Requirement already satisfied: lxml in /usr/local/lib/python3.11/dist-packages (from pyvespa) (5.3.0)\n",
+ "Requirement already satisfied: pydantic>=2.4.2 in /usr/local/lib/python3.11/dist-packages (from twelvelabs) (2.10.6)\n",
+ "Requirement already satisfied: numpy>=1.23.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (1.26.4)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from fastcore>=1.7.8->pyvespa) (24.2)\n",
+ "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx[http2]->pyvespa) (3.7.1)\n",
+ "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx[http2]->pyvespa) (2025.1.31)\n",
+ "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx[http2]->pyvespa) (1.0.7)\n",
+ "Requirement already satisfied: idna in /usr/local/lib/python3.11/dist-packages (from httpx[http2]->pyvespa) (3.10)\n",
+ "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx[http2]->pyvespa) (0.14.0)\n",
+ "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.4.2->twelvelabs) (0.7.0)\n",
+ "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.4.2->twelvelabs) (2.27.2)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil->pyvespa) (1.17.0)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (2.4.4)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (1.3.2)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (25.1.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (1.5.0)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (6.1.0)\n",
+ "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (0.2.1)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->pyvespa) (1.18.3)\n",
+ "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.11/dist-packages (from cryptography->pyvespa) (1.17.1)\n",
+ "Requirement already satisfied: urllib3>=1.26.0 in /usr/local/lib/python3.11/dist-packages (from docker->pyvespa) (2.3.0)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->pyvespa) (3.4.1)\n",
+ "Collecting h2<5,>=3 (from httpx[http2]->pyvespa)\n",
+ " Downloading h2-4.2.0-py3-none-any.whl.metadata (5.1 kB)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->pyvespa) (3.0.2)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.12->cryptography->pyvespa) (2.22)\n",
+ "Collecting hyperframe<7,>=6.1 (from h2<5,>=3->httpx[http2]->pyvespa)\n",
+ " Downloading hyperframe-6.1.0-py3-none-any.whl.metadata (4.3 kB)\n",
+ "Collecting hpack<5,>=4.1 (from h2<5,>=3->httpx[http2]->pyvespa)\n",
+ " Downloading hpack-4.1.0-py3-none-any.whl.metadata (4.6 kB)\n",
+ "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx[http2]->pyvespa) (1.3.1)\n",
+ "Downloading pyvespa-0.53.0-py3-none-any.whl (96 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m96.8/96.8 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading vespacli-8.476.30-py3-none-any.whl (50.5 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.5/50.5 MB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading twelvelabs-0.4.4-py3-none-any.whl (32 kB)\n",
+ "Downloading docker-7.1.0-py3-none-any.whl (147 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.8/147.8 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading h2-4.2.0-py3-none-any.whl (60 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.0/61.0 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading hpack-4.1.0-py3-none-any.whl (34 kB)\n",
+ "Downloading hyperframe-6.1.0-py3-none-any.whl (13 kB)\n",
+ "Installing collected packages: vespacli, hyperframe, hpack, h2, docker, twelvelabs, pyvespa\n",
+ "Successfully installed docker-7.1.0 h2-4.2.0 hpack-4.1.0 hyperframe-6.1.0 pyvespa-0.53.0 twelvelabs-0.4.4 vespacli-8.476.30\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip3 install pyvespa vespacli twelvelabs pandas"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8veAJGipbjVA",
+ "metadata": {
+ "id": "8veAJGipbjVA"
+ },
+ "source": [
+ "Import all the required packages in this notebook."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "Ojdxaw85h9tV",
+ "metadata": {
+ "id": "Ojdxaw85h9tV"
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import hashlib\n",
+ "import json\n",
+ "\n",
+ "from vespa.package import (\n",
+ " ApplicationPackage,\n",
+ " Field,\n",
+ " Schema,\n",
+ " Document,\n",
+ " HNSW,\n",
+ " RankProfile,\n",
+ " FieldSet,\n",
+ " SecondPhaseRanking,\n",
+ " Function,\n",
+ ")\n",
+ "\n",
+ "from vespa.deployment import VespaCloud\n",
+ "from vespa.io import VespaResponse, VespaQueryResponse\n",
+ "\n",
+ "from twelvelabs import TwelveLabs\n",
+ "from twelvelabs.models.embed import EmbeddingsTask\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "from datetime import datetime"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "Ukp_0PzHXAf-",
+ "metadata": {
+ "id": "Ukp_0PzHXAf-"
+ },
+ "source": [
+ "### 1.2 Get a TwelveLabs API key\n",
+ "[Sign-up](https://auth.twelvelabs.io/u/signup) for TwelveLabs.\n",
+ "\n",
+ "After logging in, navigate to your profile and get your [API key](https://playground.twelvelabs.io/dashboard/api-key). Copy it and paste it below.\n",
+ "\n",
+ "The Free plan includes indexing of 600 mins of videos, which should be sufficient to explore the capabilities of the API."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7e381016",
+ "metadata": {
+ "id": "7e381016"
+ },
+ "outputs": [],
+ "source": [
+ "TL_API_KEY = os.getenv(\"TL_API_KEY\") or input(\"Enter your TL_API key: \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "AfCgIhMPXnQQ",
+ "metadata": {
+ "id": "AfCgIhMPXnQQ"
+ },
+ "source": [
+ "### 1.3 Sign-up for a Vespa Trial Account\n",
+ "\n",
+ "**Pre-requisite**:\n",
+ "- Spin-up a Vespa Cloud [Trial](https://vespa.ai/free-trial) account.\n",
+ "- Login to the account you just created and create a tenant at [console.vespa-cloud.com](https://console.vespa-cloud.com/).\n",
+ "- Save the tenant name."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "B7uKLKsrYQgT",
+ "metadata": {
+ "id": "B7uKLKsrYQgT"
+ },
+ "source": [
+ "### 1.4 Setup the tenant name and the application name\n",
+ "\n",
+ "- Paste below the name of the tenant name.\n",
+ "- Give your application a name. Note that the name cannot have `-` or `_`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "zJlBCg6ahzJM",
+ "metadata": {
+ "id": "zJlBCg6ahzJM"
+ },
+ "outputs": [],
+ "source": [
+ "# Replace with your tenant name from the Vespa Cloud Console\n",
+ "tenant_name = \"vespa-team\"\n",
+ "# Replace with your application name (does not need to exist yet)\n",
+ "application = \"videosearch\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "Ga0DUN47dLjK",
+ "metadata": {
+ "id": "Ga0DUN47dLjK"
+ },
+ "source": [
+ "## 2. Generate Attributes and Embeddings for sample videos using TwelveLabs Embedding API\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7jXol6MHVy1i",
+ "metadata": {
+ "id": "7jXol6MHVy1i"
+ },
+ "source": [
+ "### 2.1 Generate attributes on the videos\n",
+ "\n",
+ "In this section, we will leverage the [Pegasus 1.1](https://docs.twelvelabs.io/docs/video-understanding-models) generative model to generate some attributes about our videos to store as part of the searchable information in Vespa. Attributes we want to store as part of the videos include:\n",
+ "\n",
+ "- Keywords\n",
+ "- Summaries\n",
+ "\n",
+ "For video samples, we are selecting the 3 videos in the array below from the [Internet Archive](https://archive.org/).\n",
+ "\n",
+ "You can customize this code with the urls of your choice. Note that there are certain restrictions such as the resolution of the videos.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "eQkLI-moZPhL",
+ "metadata": {
+ "id": "eQkLI-moZPhL"
+ },
+ "outputs": [],
+ "source": [
+ "VIDEO_URLs = [\n",
+ " \"https://ia801503.us.archive.org/27/items/hide-and-seek-with-giant-jenny/HnVideoEditor_2022_10_29_205557707.ia.mp4\",\n",
+ " \"https://ia601401.us.archive.org/1/items/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net.mp4\",\n",
+ " \"https://dn720401.ca.archive.org/0/items/mr-bean-the-animated-series-holiday-for-teddy/S2E12.ia.mp4\",\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "yZa-YDeNZ63r",
+ "metadata": {
+ "id": "yZa-YDeNZ63r"
+ },
+ "source": [
+ "In order to generate text on the videos, the prerequisite is to upload the videos and index them. Let's first create an index below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "elAj0cm1Upaa",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "elAj0cm1Upaa",
+ "outputId": "d33a63af-37ec-47a0-fccc-23465cbcec43"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating Index:Vespa_1739405261\n",
+ "Created index: id=67ad37cd97c35dab7dd8a8e1 name=Vespa_1739405261 models=root=[Model(name='pegasus1.2', options=['visual', 'audio'], addons=None, finetuned=False)]\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Spin-up session\n",
+ "client = TwelveLabs(api_key=TL_API_KEY)\n",
+ "\n",
+ "# Generating Index Name\n",
+ "timestamp = int(datetime.now().timestamp())\n",
+ "index_name = \"Vespa_\" + str(timestamp)\n",
+ "\n",
+ "# Create Index\n",
+ "print(\"Creating Index:\" + index_name)\n",
+ "index = client.index.create(\n",
+ " name=index_name,\n",
+ " models=[\n",
+ " {\n",
+ " \"name\": \"pegasus1.1\",\n",
+ " \"options\": [\"visual\", \"audio\"],\n",
+ " }\n",
+ " ],\n",
+ " addons=[\"thumbnail\"], # Optional\n",
+ ")\n",
+ "print(f\"Created index: id={index.id} name={index.name} models={index.models}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ylbZo766ayig",
+ "metadata": {
+ "id": "ylbZo766ayig"
+ },
+ "source": [
+ "We can now upload the videos:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "oEKeONmX7ffB",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oEKeONmX7ffB",
+ "outputId": "af2648d9-7751-4533-a73e-bb35830d1015"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Task created successfully! Task ID: 67ad37e697c35dab7dd8a8e2\n",
+ " Status=pending\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=ready\n",
+ "Indexing done: Task(id='67ad37e697c35dab7dd8a8e2', created_at='2025-02-13T00:08:06.249Z', updated_at='2025-02-13T00:08:06.249Z', index_id='67ad37cd97c35dab7dd8a8e1', video_id='67ad37e7866172db0eba23b2', status='ready', system_metadata={'filename': 'HnVideoEditor_2022_10_29_205557707.ia', 'duration': 221.9666671, 'width': 854, 'height': 480}, hls=None)\n",
+ "Uploaded https://ia801503.us.archive.org/27/items/hide-and-seek-with-giant-jenny/HnVideoEditor_2022_10_29_205557707.ia.mp4. The unique identifer of your video is 67ad37e7866172db0eba23b2.\n",
+ "Task created successfully! Task ID: 67ad383997c35dab7dd8a8e5\n",
+ " Status=pending\n",
+ " Status=pending\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=ready\n",
+ "Indexing done: Task(id='67ad383997c35dab7dd8a8e5', created_at='2025-02-13T00:09:29.653Z', updated_at='2025-02-13T00:09:29.653Z', index_id='67ad37cd97c35dab7dd8a8e1', video_id='67ad384054593ebe010b776a', status='ready', system_metadata={'filename': 'twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net', 'duration': 1448.8000001, 'width': 640, 'height': 480}, hls=None)\n",
+ "Uploaded https://ia601401.us.archive.org/1/items/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net.mp4. The unique identifer of your video is 67ad384054593ebe010b776a.\n",
+ "Task created successfully! Task ID: 67ad38e3b6823c6f8378023b\n",
+ " Status=pending\n",
+ " Status=pending\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=indexing\n",
+ " Status=ready\n",
+ "Indexing done: Task(id='67ad38e3b6823c6f8378023b', created_at='2025-02-13T00:12:19.846Z', updated_at='2025-02-13T00:12:19.846Z', index_id='67ad37cd97c35dab7dd8a8e1', video_id='67ad38e6866172db0eba23b4', status='ready', system_metadata={'filename': 'S2E12.ia', 'duration': 659.9200001, 'width': 854, 'height': 480}, hls=None)\n",
+ "Uploaded https://dn720401.ca.archive.org/0/items/mr-bean-the-animated-series-holiday-for-teddy/S2E12.ia.mp4. The unique identifer of your video is 67ad38e6866172db0eba23b4.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Capturing index id for upload\n",
+ "index_id = index.id\n",
+ "\n",
+ "\n",
+ "def on_task_update(task: EmbeddingsTask):\n",
+ " print(f\" Status={task.status}\")\n",
+ "\n",
+ "\n",
+ "for video_url in VIDEO_URLs:\n",
+ " # Create a video indexing task\n",
+ " task = client.task.create(index_id=index_id, url=video_url, language=\"en\")\n",
+ " print(f\"Task created successfully! Task ID: {task.id}\")\n",
+ " status = task.wait_for_done(sleep_interval=10, callback=on_task_update)\n",
+ " print(f\"Indexing done: {status}\")\n",
+ " if task.status != \"ready\":\n",
+ " raise RuntimeError(f\"Indexing failed with status {task.status}\")\n",
+ " print(\n",
+ " f\"Uploaded {video_url}. The unique identifer of your video is {task.video_id}.\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "yakP4RZabFxR",
+ "metadata": {
+ "id": "yakP4RZabFxR"
+ },
+ "source": [
+ "Now that the videos have been uploaded, we can generate the keywords, and summaries on the videos below. You will notice on the output that the video uploaded last is the one that is processed first in this stage. This matters since we store other attributes on the videos on arrays (eg URLs, Titles)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "z0m24cYj9-FC",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "z0m24cYj9-FC",
+ "outputId": "1f3518ca-b7db-44b9-d58f-6fa70b1bc4a5"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Generating text for 67ad38e6866172db0eba23b4\n",
+ "Summary: The video showcases a series of comedic and heartwarming scenes featuring Mr. Bean, a well-known character known for his slapstick humor and innocent charm. The story begins with Mr. Bean preparing for a holiday, holding a flashlight and a teddy bear in a room that appears to be his temporary accommodation. An elderly woman briefly interacts with him through a door, and the scene transitions to a rainy day outside a two-story red brick house. Inside, Mr. Bean is seen taking photos of his armchair with a teddy bear sitting on it, pointing to pictures on the wall, and gathering money from a piggy bank to pack in his suitcase alongside other essentials.\n",
+ "The plot then shifts to Mr. Bean driving down a coastal road towards Seaview Hotel, where he arrives with his luggage and the teddy bear. At the hotel, he encounters a little girl and attempts to give her the teddy bear, which initially fails due to her reluctance but eventually succeeds when he offers her additional toys. This interaction, however, upsets a large man at the reception desk, but Mr. Bean remains oblivious and continues with his cheerful demeanor.\n",
+ "The video then transitions to a beach setting where Mr. Bean is seen reading a magazine about sharks, and a larger man pushes a cart full of beach items past him. A young girl runs up to Mr. Bean, asking if this is what he meant by a beach. Mr. Bean explains that it is indeed a beach but not the typical one with many sharks, and the larger man humorously claims to have been attacked by sharks multiple times that day. The scene then moves to the hotel restaurant where Mr. Bean orders fish and chips, unaware that the little girl has returned and is watching him sadly from behind.\n",
+ "A dramatic moment occurs when the little girl suddenly cries out, causing Mr. Bean to react and call for help, thinking it's an emergency. The video then shifts to a scene at Dodgems amusement park where Mr. Bean and the little girl are in separate dodgem cars. Mr. Bean reaches out and grabs the teddy bear from the little girl, causing her to cry and the large man to become upset. Mr. Bean, however, remains oblivious and continues to enjoy the ride, much to the amusement of the audience.\n",
+ "The video concludes with Mr. Bean standing alone on a pier, gazing pensively at the sea, and holding the teddy bear closely. He then returns to his room, examining photos of the little girl and the teddy bear, and attempts to merge these photos together, symbolizing his connection with the little girl and the teddy bear. The video ends with the credits rolling, leaving a lasting impression of Mr. Bean's innocent and humorous adventures.\n",
+ "Throughout the video, Mr. Bean's character is portrayed as a lovable and somewhat naive figure who often finds himself in humorous situations, particularly when interacting with the little girl and the teddy bear. The video creator likely intended to evoke laughter and a sense of warmth through Mr. Bean's antics and the interactions with other characters.\n",
+ "Open-ended Text: MrBean, Holiday, TeddyBear, Beach, Hotel\n",
+ "Generating text for 67ad384054593ebe010b776a\n",
+ "Summary: The video is an animated adaptation of \"Twas the Night Before Christmas,\" narrated and sung by Joel Grey, with additional voices from Tammy Grimes, John McGiver, and George Gobel. The story unfolds in a snowy town where the night before Christmas is marked by a peculiar series of events that threaten to disrupt the holiday spirit.\n",
+ "The narrative begins with a snowy night scene, where a grandfather clock labeled \"Clock Maker\" stands outside a house. Inside the Clockmaker's workshop, tools and equipment are covered in cobwebs, hinting at the quiet and stillness of the night. The camera then shifts to a bedroom where two children lie asleep under pink blankets, with notes pinned near the window addressed to Santa Claus, listing their Christmas wishes.\n",
+ "One child, reading a book titled \"Twas The Night Before Christmas,\" falls asleep as a shadowy figure appears at the foot of the bed. The scene transitions to a mouse family's home beneath the human family's bedroom, where a young boy mouse is seen lying awake in bed, holding a book open. He looks up towards something unseen off-screen before closing his eyes again.\n",
+ "The mouse family, like the human family, is preparing for Christmas. A postman delivers mail amidst autumnal scenery, and inside their home, the mice are surprised to find letters addressed to Santa Claus, marked 'Not Acceptable By The North Address'. This leads to a town-wide concern as people gather around a town hall, waving signs and advocating for letter delivery to Santa.\n",
+ "The story then introduces Joshua Trundle, a clockmaker who believes he can mend the rift with Santa by creating a special clock that plays a song welcoming Santa on Christmas Eve. Despite initial skepticism, the town authorizes the construction of the clock, hoping it will make Santa happy again.\n",
+ "However, Albert, Joshua's son, has written a letter to the editor criticizing Santa Claus, which has angered Santa and led to the return of all the children's letters. Albert feels guilty and decides to fix the clock by midnight, hoping to make amends and bring back Santa's goodwill.\n",
+ "As Christmas Eve approaches, the town is filled with anticipation and anxiety. The clockmaker's clock fails to work, and the town prepares for a Christmas without Santa. The video captures the emotional turmoil of the families, especially the Trundles, who are struggling financially and are unsure if they will have a Christmas at all.\n",
+ "The climax of the video is a heartfelt plea from the characters, emphasizing the importance of believing in miracles and the need to help make them happen. The song \"Even a Miracle Needs a Hand\" underscores the theme of community effort and the power of belief.\n",
+ "Finally, as the clock strikes midnight, Santa Claus appears, flying over the town with his sleigh and reindeer. The children watch in awe as Santa fills their stockings and leaves gifts. The video ends with the classic lines from \"Twas the Night Before Christmas,\" and the final scene shows the town covered in snow, with the clock tower in the background, as the credits roll.\n",
+ "Throughout the video, the animation style is whimsical and charming, with a focus on the interplay between the human and mouse families. The narrative is enriched by the musical elements, which include renditions of \"Twas the Night Before Christmas\" and original songs that capture the spirit of the holiday season. The video aims to convey the importance of belief, community, and the magic of Christmas, making it a heartwarming and engaging adaptation of the classic poem.\n",
+ "Open-ended Text: Christmas, Clockmaker, Mice, Santa Claus, Snowfall\n",
+ "Generating text for 67ad37e7866172db0eba23b2\n",
+ "Summary: The video opens with a whimsical scene where a panda holding a green apple emerges from behind a yellow triangle, followed by the appearance of various shapes and three cartoon wolves, each with distinct characteristics: one wearing glasses and a yellow dress, another with a pink bow and a diaper, and the last one gray with a red shirt. This sets the tone for a playful and imaginative narrative.\n",
+ "The story then transitions to a solid cyan backdrop where two cartoon dogs dressed as waiters are startled by the sudden descent of a giant fox, causing them to drop their items and fall backward. This segment introduces a sense of surprise and chaos, which is a recurring theme throughout the video.\n",
+ "Inside a room with purple walls adorned with flower patterns, a television set displays animated content featuring anthropomorphic animals. Three cartoon wolves are watching TV when a small alien character enters the scene, causing the wolves to react with surprise and fear. This moment adds an element of mystery and introduces the alien as a significant character.\n",
+ "The narrative then shifts to a street view on the TV where a robot-like figure chases after cars. After this segment, the camera zooms out to show all four characters sitting together again. The perspective then changes to reveal a cardboard box containing craft supplies, and two hands begin assembling a miniature model of a colorful town. This town is made entirely of painted cardboard boxes, complete with toy vehicles like buses and cars moving around within its boundaries. The creation of this town is a highlight of the video, showcasing creativity and imagination.\n",
+ "The three cartoon wolves express excitement over the completed town and engage in hide-and-seek games among themselves, utilizing different parts of the constructed townscape. This segment is filled with playful interactions and the joy of discovery, as the wolves successfully hide and are eventually found.\n",
+ "The video then introduces a pink cartoon dog wearing a diaper, crawling across a road between colorful buildings. A small brown wolf pup looks out the window, expressing concern. The scene shifts back to the street where the large dog is now holding the smaller wolf pup, looking around worriedly. This moment introduces a subplot of care and concern between the characters.\n",
+ "In a miniature cityscape setting, the larger dog chases after toy cars driven by other toys, including the smaller wolf pup. Two aliens peek through a red building's arched windows, observing the situation below. The smaller wolf pup stands alone, appearing thoughtful before noticing its bottle falling from above and catching it just in time. This sequence of events adds layers of complexity to the narrative, blending care, playfulness, and a touch of danger.\n",
+ "The larger dog runs towards the camera, passing behind two toy police cars parked near the curb. The smaller wolf pup watches as the larger dog approaches her again, and the larger dog continues running down the street, seemingly searching for someone. Inside the playroom, the larger dog sits amidst scattered blocks, watching over the alien and another gray wolf pup who appear distressed. The green alien drives away in a red convertible car, leaving the gray wolf pup behind. The larger dog pursues the fleeing vehicle, which has picked up the gray wolf pup inside, expressing surprise upon seeing the driver change seats with the passenger during the journey. The car passes a rainbow-colored bus stop sign, indicating that the chase leads beyond the initial town area.\n",
+ "The video concludes with the characters standing together under a bridge, relieved to be reunited. They express gratitude to each other for helping one another throughout the day, bringing a sense of closure and camaraderie to the narrative.\n",
+ "Overall, the video is a delightful blend of imagination, creativity, and playful interactions, centered around the characters' adventures in a colorful and whimsical world.\n",
+ "Open-ended Text: Wolf, Cardboard, Alien, Hide-and-seek, GiantFox\n"
+ ]
+ }
+ ],
+ "source": [
+ "client = TwelveLabs(api_key=TL_API_KEY)\n",
+ "\n",
+ "summaries = []\n",
+ "keywords_array = []\n",
+ "\n",
+ "# Get all videos in an Index\n",
+ "videos = client.index.video.list(index_id)\n",
+ "for video in videos:\n",
+ " print(f\"Generating text for {video.id}\")\n",
+ "\n",
+ " res = client.generate.summarize(video_id=video.id, type=\"summary\")\n",
+ " print(f\"Summary: {res.summary}\")\n",
+ " summaries.append(res.summary)\n",
+ "\n",
+ " keywords = client.generate.text(\n",
+ " video_id=video.id,\n",
+ " prompt=\"Based on this video, I want to generate five keywords for SEO (Search Engine Optimization). Provide just the keywords as a comma delimited list without any additional text.\",\n",
+ " )\n",
+ " print(f\"Open-ended Text: {keywords.data}\")\n",
+ " keywords_array.append(keywords.data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "VBebeBFcc7vx",
+ "metadata": {
+ "id": "VBebeBFcc7vx"
+ },
+ "source": [
+ "We need to store the titles of the videos as an additional attribute."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "sPliZOSZU3em",
+ "metadata": {
+ "id": "sPliZOSZU3em"
+ },
+ "outputs": [],
+ "source": [
+ "# Creating array with titles\n",
+ "titles = [\n",
+ " \"Mr. Bean the Animated Series Holiday for Teddy\",\n",
+ " \"Twas the night before Christmas\",\n",
+ " \"Hide and Seek with Giant Jenny\",\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c4aJ2Rr5fDD8",
+ "metadata": {
+ "id": "c4aJ2Rr5fDD8"
+ },
+ "source": [
+ "## 2.2 Generate Embeddings\n",
+ "\n",
+ "The following code leverages the [Embed API](https://docs.twelvelabs.io/docs/create-video-embeddings) to create an asynchronous embedding task to embed the sample videos.\n",
+ "\n",
+ "Twelve Labs video embeddings capture all the subtle cues and interactions between different modalities, including the visual expressions, body language, spoken words, and the overall context of the video, encapsulating the essence of all these modalities and their interrelations over time.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "qm2DXkatR1pP",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "qm2DXkatR1pP",
+ "outputId": "2dfda058-d691-4c17-8431-37ed49fcfc73"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created task: id=67ad39e5b6823c6f83780246 model_name=Marengo-retrieval-2.7 status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=ready\n",
+ "Embedding done: ready\n",
+ "Created task: id=67ad3a1ab6823c6f8378024c model_name=Marengo-retrieval-2.7 status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=ready\n",
+ "Embedding done: ready\n",
+ "Created task: id=67ad3aafb6823c6f83780256 model_name=Marengo-retrieval-2.7 status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=processing\n",
+ " Status=ready\n",
+ "Embedding done: ready\n"
+ ]
+ }
+ ],
+ "source": [
+ "client = TwelveLabs(api_key=TL_API_KEY)\n",
+ "\n",
+ "# Initialize an array to store the task IDs as strings\n",
+ "task_ids = []\n",
+ "\n",
+ "for url in VIDEO_URLs:\n",
+ " task = client.embed.task.create(model_name=\"Marengo-retrieval-2.7\", video_url=url)\n",
+ " print(\n",
+ " f\"Created task: id={task.id} model_name={task.model_name} status={task.status}\"\n",
+ " )\n",
+ " # Append the task ID to the array\n",
+ " task_ids.append(str(task.id))\n",
+ " status = task.wait_for_done(sleep_interval=10, callback=on_task_update)\n",
+ " print(f\"Embedding done: {status}\")\n",
+ " if task.status != \"ready\":\n",
+ " raise RuntimeError(f\"Embedding failed with status {task.status}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6R198-KVq9if",
+ "metadata": {
+ "id": "6R198-KVq9if"
+ },
+ "source": [
+ "## 2.3 Retrieve Embeddings\n",
+ "\n",
+ "Once the embedding task is completed, we can retrieve the results of the embedding task based on the task_ids."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "9By4UdCgGChw",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "9By4UdCgGChw",
+ "outputId": "26daf14d-e51d-4b2b-e86d-892d6c22bdb1"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Task ID: 67ad39e5b6823c6f83780246\n",
+ "Status: ready\n",
+ "Task ID: 67ad3a1ab6823c6f8378024c\n",
+ "Status: ready\n",
+ "Task ID: 67ad3aafb6823c6f83780256\n",
+ "Status: ready\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Spin-up session\n",
+ "client = TwelveLabs(api_key=TL_API_KEY)\n",
+ "\n",
+ "# Initialize an array to store the task objects directly\n",
+ "tasks = []\n",
+ "\n",
+ "for task_id in task_ids:\n",
+ " # Retrieve the task\n",
+ " task = client.embed.task.retrieve(task_id)\n",
+ " tasks.append(task)\n",
+ "\n",
+ " # Print task details\n",
+ " print(f\"Task ID: {task.id}\")\n",
+ " print(f\"Status: {task.status}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "kpuVxCshxoD3",
+ "metadata": {
+ "id": "kpuVxCshxoD3"
+ },
+ "source": [
+ "We can now review the output structure of the first segment for each one of these videos. This output will help us define the schema to store the embeddings in Vespa in the second part of this notebook.\n",
+ "\n",
+ "From looking at this output, the video has been embedded into chunks of 6 seconds each (default configurable value in the Embed API). Each embedding has a float vector of dimension 1024.\n",
+ "\n",
+ "The number of segments generated vary per video, based on the length of the videos ranging from 37 to 242 segments."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "4iyjmzpYsRUz",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4iyjmzpYsRUz",
+ "outputId": "222e3307-2b28-41a3-de1c-1b45a04f183d"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "67ad39e5b6823c6f83780246\n",
+ "start_offset_sec: float : 0.0\n",
+ "end_offset_sec: float : 6.0\n",
+ "embedding_scope: str : clip\n",
+ "embeddings_float: list of size 1024 (truncated to 5 items): [0.030361895, 0.008698823, -0.0048321243, -0.019013105, -0.011488311] \n",
+ "Total Number of segments: 37\n",
+ "67ad3a1ab6823c6f8378024c\n",
+ "start_offset_sec: float : 0.0\n",
+ "end_offset_sec: float : 6.0\n",
+ "embedding_scope: str : clip\n",
+ "embeddings_float: list of size 1024 (truncated to 5 items): [0.024328815, -0.0035867887, 0.016065866, 0.02501548, 0.007778642] \n",
+ "Total Number of segments: 242\n",
+ "67ad3aafb6823c6f83780256\n",
+ "start_offset_sec: float : 0.0\n",
+ "end_offset_sec: float : 6.0\n",
+ "embedding_scope: str : clip\n",
+ "embeddings_float: list of size 1024 (truncated to 5 items): [0.04080625, 0.0086980555, 0.00096186635, -0.00607, -0.020250283] \n",
+ "Total Number of segments: 110\n"
+ ]
+ }
+ ],
+ "source": [
+ "for task in tasks:\n",
+ " print(task.id)\n",
+ " # Display data types of each field\n",
+ " for key, value in task.video_embedding.segments[0]:\n",
+ " if isinstance(value, list):\n",
+ " print(\n",
+ " f\"{key}: list of size {len(value)} (truncated to 5 items): {value[:5]} \"\n",
+ " )\n",
+ " else:\n",
+ " print(f\"{key}: {type(value).__name__} : {value}\")\n",
+ " print(f\"Total Number of segments: {len(task.video_embedding.segments)}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1iscs7g-5xOD",
+ "metadata": {
+ "id": "1iscs7g-5xOD"
+ },
+ "source": [
+ "# 3. Deploy a Vespa Application\n",
+ "\n",
+ "At this point, we are ready to deploy a Vespa Application. We have generated the attributes we needed on each video, as well as the embeddings."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "hbZ-pJO37-1O",
+ "metadata": {
+ "id": "hbZ-pJO37-1O"
+ },
+ "source": [
+ "## 3.1 Create an Application Package\n",
+ "\n",
+ "The [application package](https://pyvespa.readthedocs.io/en/latest/reference-api.html#vespa.package.ApplicationPackage)\n",
+ "has all the Vespa configuration files -\n",
+ "create one from scratch:\n",
+ "\n",
+ "The Vespa schema deployed as part of the package is called `videos`. All the fields are matching the output of the Twelvelabs Embed API above. Refer to the [Vespa documentation](https://docs.vespa.ai/en/reference/schema-reference.html) for more information on the schema specification.\n",
+ "\n",
+ "We can first define the schema using pyvespa"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "T941tSJOiDCx",
+ "metadata": {
+ "id": "T941tSJOiDCx"
+ },
+ "outputs": [],
+ "source": [
+ "videos_schema = Schema(\n",
+ " name=\"videos\",\n",
+ " document=Document(\n",
+ " fields=[\n",
+ " Field(name=\"video_url\", type=\"string\", indexing=[\"summary\"]),\n",
+ " Field(\n",
+ " name=\"title\",\n",
+ " type=\"string\",\n",
+ " indexing=[\"index\", \"summary\"],\n",
+ " match=[\"text\"],\n",
+ " index=\"enable-bm25\",\n",
+ " ),\n",
+ " Field(\n",
+ " name=\"keywords\",\n",
+ " type=\"string\",\n",
+ " indexing=[\"index\", \"summary\"],\n",
+ " match=[\"text\"],\n",
+ " index=\"enable-bm25\",\n",
+ " ),\n",
+ " Field(\n",
+ " name=\"video_summary\",\n",
+ " type=\"string\",\n",
+ " indexing=[\"index\", \"summary\"],\n",
+ " match=[\"text\"],\n",
+ " index=\"enable-bm25\",\n",
+ " ),\n",
+ " Field(\n",
+ " name=\"embedding_scope\", type=\"string\", indexing=[\"attribute\", \"summary\"]\n",
+ " ),\n",
+ " Field(\n",
+ " name=\"start_offset_sec\",\n",
+ " type=\"array\",\n",
+ " indexing=[\"attribute\", \"summary\"],\n",
+ " ),\n",
+ " Field(\n",
+ " name=\"end_offset_sec\",\n",
+ " type=\"array\",\n",
+ " indexing=[\"attribute\", \"summary\"],\n",
+ " ),\n",
+ " Field(\n",
+ " name=\"embeddings\",\n",
+ " type=\"tensor(p{},x[1024])\",\n",
+ " indexing=[\"index\", \"attribute\"],\n",
+ " ann=HNSW(distance_metric=\"angular\"),\n",
+ " ),\n",
+ " ]\n",
+ " ),\n",
+ ")\n",
+ "\n",
+ "fieldsets = (\n",
+ " [\n",
+ " FieldSet(\n",
+ " name=\"default\",\n",
+ " fields=[\"title\", \"keywords\", \"video_summary\"],\n",
+ " ),\n",
+ " ],\n",
+ ")\n",
+ "\n",
+ "mapfunctions = [\n",
+ " Function(\n",
+ " name=\"similarities\",\n",
+ " expression=\"\"\"\n",
+ " sum(\n",
+ " query(q) * attribute(embeddings), x\n",
+ " )\n",
+ " \"\"\",\n",
+ " ),\n",
+ " Function(\n",
+ " name=\"bm25_score\",\n",
+ " expression=\"bm25(title) + bm25(keywords) + bm25(video_summary)\",\n",
+ " ),\n",
+ "]\n",
+ "\n",
+ "semantic_rankprofile = RankProfile(\n",
+ " name=\"hybrid\",\n",
+ " inputs=[(\"query(q)\", \"tensor(x[1024])\")],\n",
+ " first_phase=\"bm25_score\",\n",
+ " second_phase=SecondPhaseRanking(\n",
+ " expression=\"closeness(field, embeddings)\", rerank_count=10\n",
+ " ),\n",
+ " match_features=[\"closest(embeddings)\"],\n",
+ " summary_features=[\"similarities\"],\n",
+ " functions=mapfunctions,\n",
+ ")\n",
+ "\n",
+ "videos_schema.add_rank_profile(semantic_rankprofile)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "HsUrAY78k6Xr",
+ "metadata": {
+ "id": "HsUrAY78k6Xr"
+ },
+ "source": [
+ "We can now create the package based on the previous schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "qvRl19JFBJGS",
+ "metadata": {
+ "id": "qvRl19JFBJGS"
+ },
+ "outputs": [],
+ "source": [
+ "# Create the Vespa application package\n",
+ "package = ApplicationPackage(name=application, schema=[videos_schema])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "_BJzOAOM_QFW",
+ "metadata": {
+ "id": "_BJzOAOM_QFW"
+ },
+ "source": [
+ "## 3.2 Deploy the Application Package\n",
+ "\n",
+ "The app is now defined and ready to deploy to Vespa Cloud.\n",
+ "\n",
+ "Deploy `package` to Vespa Cloud, by creating an instance of\n",
+ "[VespaCloud](https://pyvespa.readthedocs.io/en/latest/reference-api.html#vespa.deployment.VespaCloud):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "or8HJb5Q26h5",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "or8HJb5Q26h5",
+ "outputId": "c2e506cf-a4f9-4dc2-d13d-43bb7359583b"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Setting application...\n",
+ "Running: vespa config set application vespa-team.videosearch\n",
+ "Setting target cloud...\n",
+ "Running: vespa config set target cloud\n",
+ "\n",
+ "Api-key found for control plane access. Using api-key.\n",
+ "Certificate and key not found in /Users/thomas/Repos/pyvespa/docs/sphinx/source/examples/.vespa or /Users/thomas/.vespa/vespa-team.videosearch.default: Creating new cert/key pair with vespa CLI.\n",
+ "Generating certificate and key...\n",
+ "Running: vespa auth cert -N\n",
+ "Success: Certificate written to '/Users/thomas/.vespa/vespa-team.videosearch.default/data-plane-public-cert.pem'\n",
+ "Success: Private key written to '/Users/thomas/.vespa/vespa-team.videosearch.default/data-plane-private-key.pem'\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "vespa_cloud = VespaCloud(\n",
+ " tenant=tenant_name,\n",
+ " application=application,\n",
+ " application_package=package,\n",
+ " key_content=os.getenv(\"VESPA_TEAM_API_KEY\", None),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "nLeaLna86ApZ",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "nLeaLna86ApZ",
+ "outputId": "3bbabd17-0403-4fe6-f34f-8b4fb0220ddc"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Deployment started in run 1 of dev-aws-us-east-1c for vespa-team.videosearch. This may take a few minutes the first time.\n",
+ "INFO [13:08:31] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:08:32] Using CA signed certificate version 1\n",
+ "INFO [13:08:32] Using 1 nodes in container cluster 'videosearch_container'\n",
+ "INFO [13:08:34] Using 1 nodes in container cluster 'videosearch_container'\n",
+ "INFO [13:08:37] Session 336907 for tenant 'vespa-team' prepared, but activation failed: 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:08:47] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:08:47] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:08:55] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:08:56] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:09:06] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:09:06] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:09:16] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:09:16] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:09:25] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:09:25] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:09:36] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:09:36] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:09:47] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:09:47] 1/2 application hosts and 2/2 admin hosts for vespa-team.videosearch have completed provisioning and bootstrapping, still waiting for h113173.dev.us-east-1c.aws.vespa-cloud.net\n",
+ "INFO [13:10:02] Deploying platform version 8.475.11 and application dev build 1 for dev-aws-us-east-1c of default ...\n",
+ "INFO [13:10:03] Session 336907 for vespa-team.videosearch.default activated\n",
+ "INFO [13:10:24] ######## Details for all nodes ########\n",
+ "INFO [13:10:25] h113173a.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "INFO [13:10:25] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "INFO [13:10:25] --- storagenode on port 19102 has not started \n",
+ "INFO [13:10:25] --- searchnode on port 19107 has not started \n",
+ "INFO [13:10:25] --- distributor on port 19111 has not started \n",
+ "INFO [13:10:25] --- metricsproxy-container on port 19092 has not started \n",
+ "INFO [13:10:25] h113168a.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "INFO [13:10:25] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "INFO [13:10:25] --- container on port 4080 has not started \n",
+ "INFO [13:10:25] --- metricsproxy-container on port 19092 has not started \n",
+ "INFO [13:10:25] h112903j.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "INFO [13:10:25] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "INFO [13:10:25] --- container-clustercontroller on port 19050 has not started \n",
+ "INFO [13:10:25] --- metricsproxy-container on port 19092 has not started \n",
+ "INFO [13:10:25] h97462d.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "INFO [13:10:25] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "INFO [13:10:25] --- logserver-container on port 4080 has not started \n",
+ "INFO [13:10:25] --- metricsproxy-container on port 19092 has not started \n",
+ "INFO [13:11:36] Waiting for convergence of 10 services across 4 nodes\n",
+ "INFO [13:11:36] 1 nodes booting\n",
+ "INFO [13:11:36] 7 application services still deploying\n",
+ "DEBUG [13:11:36] h113168a.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "DEBUG [13:11:36] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "DEBUG [13:11:36] --- container on port 4080 has not started \n",
+ "DEBUG [13:11:36] --- metricsproxy-container on port 19092 has not started \n",
+ "DEBUG [13:11:36] h113173a.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "DEBUG [13:11:36] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "DEBUG [13:11:36] --- storagenode on port 19102 has config generation 336907, wanted is 336907\n",
+ "DEBUG [13:11:36] --- searchnode on port 19107 has config generation 336907, wanted is 336907\n",
+ "DEBUG [13:11:36] --- distributor on port 19111 has config generation 336907, wanted is 336907\n",
+ "DEBUG [13:11:36] --- metricsproxy-container on port 19092 has not started \n",
+ "DEBUG [13:11:36] h112903j.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "DEBUG [13:11:36] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "DEBUG [13:11:36] --- container-clustercontroller on port 19050 has not started \n",
+ "DEBUG [13:11:36] --- metricsproxy-container on port 19092 has not started \n",
+ "DEBUG [13:11:36] h97462d.dev.us-east-1c.aws.vespa-cloud.net: expected to be UP\n",
+ "DEBUG [13:11:36] --- platform vespa/cloud-tenant-rhel8:8.475.11\n",
+ "DEBUG [13:11:36] --- logserver-container on port 4080 has not started \n",
+ "DEBUG [13:11:36] --- metricsproxy-container on port 19092 has not started \n",
+ "INFO [13:12:26] Found endpoints:\n",
+ "INFO [13:12:26] - dev.aws-us-east-1c\n",
+ "INFO [13:12:26] |-- https://a583302e.cd0ed489.z.vespa-app.cloud/ (cluster 'videosearch_container')\n",
+ "INFO [13:12:26] Deployment complete!\n",
+ "Only region: aws-us-east-1c available in dev environment.\n",
+ "Found mtls endpoint for videosearch_container\n",
+ "URL: https://a583302e.cd0ed489.z.vespa-app.cloud/\n",
+ "Application is up!\n"
+ ]
+ }
+ ],
+ "source": [
+ "app = vespa_cloud.deploy()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "v-58uKvQ_78K",
+ "metadata": {
+ "id": "v-58uKvQ_78K"
+ },
+ "source": [
+ "## 3.3 Feed the Vespa Application\n",
+ "\n",
+ "The `vespa_feed` feed format for `pyvespa` expects a dict with the keys `id` and `fields`:\n",
+ "\n",
+ "`{ \"id\": \"vespa-document-id\", \"fields\": {\"vespa_field\": \"vespa-field-value\"}}`\n",
+ "\n",
+ "For the id, we will use a md5 hash of the video url.\n",
+ "\n",
+ "The video embedding output segments are added to the `fields` in `vespa_feed`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "dfpazeii8Abp",
+ "metadata": {
+ "id": "dfpazeii8Abp"
+ },
+ "outputs": [],
+ "source": [
+ "# Initialize a list to store Vespa feed documents\n",
+ "vespa_feed = []\n",
+ "\n",
+ "# Need to reverse VIDEO_URLS as keywords/summaries generated in reverse order\n",
+ "VIDEO_URLs.reverse()\n",
+ "\n",
+ "# Iterate through each task and corresponding metadata\n",
+ "for i, task in enumerate(tasks):\n",
+ " video_url = VIDEO_URLs[i]\n",
+ " title = titles[i]\n",
+ " keywords = keywords_array[i]\n",
+ " summary = summaries[i]\n",
+ "\n",
+ " start_offsets = [] # Reset for each video\n",
+ " end_offsets = [] # Reset for each video\n",
+ " embeddings = {} # Reset for each video\n",
+ "\n",
+ " # Iterate through the video embedding segments\n",
+ " for index, segment in enumerate(task.video_embedding.segments):\n",
+ " # Append start and end offsets as floats\n",
+ " start_offsets.append(float(segment.start_offset_sec))\n",
+ " end_offsets.append(float(segment.end_offset_sec))\n",
+ "\n",
+ " # Add embedding to a multi-dimensional dictionary with index as the key\n",
+ " embeddings[str(index)] = list(map(float, segment.embeddings_float))\n",
+ "\n",
+ " # Create Vespa document for each task\n",
+ " for segment in task.video_embedding.segments:\n",
+ " start_offset_sec = segment.start_offset_sec\n",
+ " end_offset_sec = segment.end_offset_sec\n",
+ " embedding = list(map(float, segment.embeddings_float))\n",
+ "\n",
+ " # Create a unique ID by hashing the URL and segment index\n",
+ " id_hash = hashlib.md5(f\"{video_url}_{index}\".encode()).hexdigest()\n",
+ "\n",
+ " document = {\n",
+ " \"id\": id_hash,\n",
+ " \"fields\": {\n",
+ " \"video_url\": video_url,\n",
+ " \"title\": title,\n",
+ " \"keywords\": keywords,\n",
+ " \"video_summary\": summary,\n",
+ " \"embedding_scope\": segment.embedding_scope,\n",
+ " \"start_offset_sec\": start_offsets,\n",
+ " \"end_offset_sec\": end_offsets,\n",
+ " \"embeddings\": embeddings,\n",
+ " },\n",
+ " }\n",
+ " vespa_feed.append(document)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "G7KUVns0BSzZ",
+ "metadata": {
+ "id": "G7KUVns0BSzZ"
+ },
+ "source": [
+ "We can quickly validate the number of the number of documents created (one for each video), and visually check the first record."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "aZDNkQEXMU15",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "aZDNkQEXMU15",
+ "outputId": "ec476d4e-bc9e-4559-d37f-c37816d21da5"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total documents created: 3\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Print Vespa feed size and an example\n",
+ "print(f\"Total documents created: {len(vespa_feed)}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "TF2GsukrA9Xw",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "TF2GsukrA9Xw",
+ "outputId": "84443f3d-d929-490b-e150-d03d00010f8f"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"id\": \"0b1fc68a17391fb58102a539ed290d27\",\n",
+ " \"fields\": {\n",
+ " \"video_url\": \"https://ia801503.us.archive.org/27/items/hide-and-seek-with-giant-jenny/HnVideoEditor_2022_10_29_205557707.ia.mp4\",\n",
+ " \"title\": \"Hide and Seek with Giant Jenny\",\n",
+ " \"keywords\": \"Wolf, Cardboard, Alien, Hide-and-seek, GiantFox\",\n",
+ " \"video_summary\": \"The video opens with a whimsical scene where a panda holding a green apple emerges from behind a yellow triangle, followed by the appearance of various shapes and three cartoon wolves, each with distinct characteristics: one wearing glasses and a yellow dress, another with a pink bow and a diaper, and the last one gray with a red shirt. This sets the tone for a playful and imaginative narrative.\\nThe story then transitions to a solid cyan backdrop where two cartoon dogs dressed as waiters are startled by the sudden descent of a giant fox, causing them to drop their items and fall backward. This segment introduces a sense of surprise and chaos, which is a recurring theme throughout the video.\\nInside a room with purple walls adorned with flower patterns, a television set displays animated content featuring anthropomorphic animals. Three cartoon wolves are watching TV when a small alien character enters the scene, causing the wolves to react with surprise and fear. This moment adds an element of mystery and introduces the alien as a significant character.\\nThe narrative then shifts to a street view on the TV where a robot-like figure chases after cars. After this segment, the camera zooms out to show all four characters sitting together again. The perspective then changes to reveal a cardboard box containing craft supplies, and two hands begin assembling a miniature model of a colorful town. This town is made entirely of painted cardboard boxes, complete with toy vehicles like buses and cars moving around within its boundaries. The creation of this town is a highlight of the video, showcasing creativity and imagination.\\nThe three cartoon wolves express excitement over the completed town and engage in hide-and-seek games among themselves, utilizing different parts of the constructed townscape. This segment is filled with playful interactions and the joy of discovery, as the wolves successfully hide and are eventually found.\\nThe video then introduces a pink cartoon dog wearing a diaper, crawling across a road between colorful buildings. A small brown wolf pup looks out the window, expressing concern. The scene shifts back to the street where the large dog is now holding the smaller wolf pup, looking around worriedly. This moment introduces a subplot of care and concern between the characters.\\nIn a miniature cityscape setting, the larger dog chases after toy cars driven by other toys, including the smaller wolf pup. Two aliens peek through a red building's arched windows, observing the situation below. The smaller wolf pup stands alone, appearing thoughtful before noticing its bottle falling from above and catching it just in time. This sequence of events adds layers of complexity to the narrative, blending care, playfulness, and a touch of danger.\\nThe larger dog runs towards the camera, passing behind two toy police cars parked near the curb. The smaller wolf pup watches as the larger dog approaches her again, and the larger dog continues running down the street, seemingly searching for someone. Inside the playroom, the larger dog sits amidst scattered blocks, watching over the alien and another gray wolf pup who appear distressed. The green alien drives away in a red convertible car, leaving the gray wolf pup behind. The larger dog pursues the fleeing vehicle, which has picked up the gray wolf pup inside, expressing surprise upon seeing the driver change seats with the passenger during the journey. The car passes a rainbow-colored bus stop sign, indicating that the chase leads beyond the initial town area.\\nThe video concludes with the characters standing together under a bridge, relieved to be reunited. They express gratitude to each other for helping one another throughout the day, bringing a sense of closure and camaraderie to the narrative.\\nOverall, the video is a delightful blend of imagination, creativity, and playful interactions, centered around the characters' adventures in a colorful and whimsical world.\",\n",
+ " \"embedding_scope\": \"clip\",\n",
+ " \"start_offset_sec\": [\n",
+ " 0.0,\n",
+ " 6.0,\n",
+ " 12.0\n",
+ " ],\n",
+ " \"end_offset_sec\": [\n",
+ " 6.0,\n",
+ " 12.0,\n",
+ " 18.0\n",
+ " ],\n",
+ " \"embedding\": {\n",
+ " \"0\": [\n",
+ " 0.04080625,\n",
+ " 0.0086980555,\n",
+ " 0.00096186635\n",
+ " ],\n",
+ " \"1\": [\n",
+ " 0.05161131,\n",
+ " -0.0063618324,\n",
+ " -0.008135624\n",
+ " ],\n",
+ " \"2\": [\n",
+ " 0.050463274,\n",
+ " 0.0006376326,\n",
+ " -0.010785032\n",
+ " ]\n",
+ " }\n",
+ " }\n",
+ "}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# The positional index of the document\n",
+ "i = 0\n",
+ "\n",
+ "# Iterate through the first 3 embeddings in vespa_feed\n",
+ "for i in range(\n",
+ " min(3, len(vespa_feed))\n",
+ "): # Ensure we don't exceed the length of vespa_feed\n",
+ " # Limit the embedding to the first 3 keys and first 5 values for each key\n",
+ " embedding = vespa_feed[i][\"fields\"][\"embeddings\"]\n",
+ " embedding_sample = {key: values[:3] for key, values in list(embedding.items())[:3]}\n",
+ "\n",
+ "# Beautify and print the first document with only the first 5 embedding values\n",
+ "pretty_json = json.dumps(\n",
+ " {\n",
+ " \"id\": vespa_feed[i][\"id\"],\n",
+ " \"fields\": {\n",
+ " \"video_url\": vespa_feed[i][\"fields\"][\"video_url\"],\n",
+ " \"title\": vespa_feed[i][\"fields\"][\"title\"],\n",
+ " \"keywords\": vespa_feed[i][\"fields\"][\"keywords\"],\n",
+ " \"video_summary\": vespa_feed[i][\"fields\"][\"video_summary\"],\n",
+ " \"embedding_scope\": vespa_feed[i][\"fields\"][\"embedding_scope\"],\n",
+ " \"start_offset_sec\": vespa_feed[i][\"fields\"][\"start_offset_sec\"][:3],\n",
+ " \"end_offset_sec\": vespa_feed[i][\"fields\"][\"end_offset_sec\"][:3],\n",
+ " \"embedding\": embedding_sample,\n",
+ " },\n",
+ " },\n",
+ " indent=4,\n",
+ ")\n",
+ "\n",
+ "print(pretty_json)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "JPgWYO_zDfBY",
+ "metadata": {
+ "id": "JPgWYO_zDfBY"
+ },
+ "source": [
+ "Now we can feed to Vespa using `feed_iterable` which accepts any `Iterable` and an optional callback function where we can\n",
+ "check the outcome of each operation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d3d650aa",
+ "metadata": {
+ "id": "d3d650aa"
+ },
+ "outputs": [],
+ "source": [
+ "def callback(response: VespaResponse, id: str):\n",
+ " if not response.is_successful():\n",
+ " print(\n",
+ " f\"Failed to feed document {id} with status code {response.status_code}: Reason {response.get_json()}\"\n",
+ " )\n",
+ "\n",
+ "\n",
+ "# Feed data into Vespa synchronously\n",
+ "app.feed_iterable(vespa_feed, schema=\"videos\", callback=callback)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1TmrMnhEQx2",
+ "metadata": {
+ "id": "e1TmrMnhEQx2"
+ },
+ "source": [
+ "# 4. Performing search on the videos\n",
+ "\n",
+ "\n",
+ "## 4.1 Performing a hybrid search on the video\n",
+ "\n",
+ "As an example query, we will retrieve all the chunks which shows Santa Claus on his sleigh. The first step is to generate a text embedding for `Santa Claus on his sleigh` using the `Marengo-retrieval-2.7` model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8wxSLkGkpL8w",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8wxSLkGkpL8w",
+ "outputId": "ebc3da4e-ce15-4fae-f517-811c1110c4b8"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created a text embedding\n",
+ " Model: Marengo-retrieval-2.7\n",
+ " Embedding Dimension: 1024\n",
+ "[-0.0043029785, -0.0034332275, 0.03857422, -0.07080078, -0.044677734, 0.0005226135, -0.008178711, -0.0019989014, 0.01953125, 0.044433594, -0.0008735657, 0.028076172, -0.03930664, 0.024780273, -0.03466797, 0.015380859, 0.020751953, 0.010070801, 0.03881836, -0.018798828, 0.04638672, 0.048339844, 0.028564453, -0.05419922, 0.008972168, 0.003692627, -0.041259766, 3.2901764e-05, -0.016113281, -0.016845703, 0.033203125, -0.014038086, -0.018432617, -0.011047363, -0.04711914, 0.010498047, 0.019897461, 0.040771484, -0.012939453, 0.0072021484, -0.036376953, -0.0011901855, -0.026611328, -0.029907227, -0.0011749268, 0.01928711, 0.030029297, 0.020629883, 0.020874023, -0.01965332, 0.03173828, -0.076171875, -0.022216797, 0.03540039, -0.017333984, 0.013000488, -0.020507812, -0.002319336, 0.016845703, 0.033203125, 0.0625, 0.01171875, 0.005218506, 0.041503906, 0.013916016, 0.046875, 0.028686523, 0.042236328, -0.025512695, 0.06225586, 0.0008621216, 0.053466797, -0.014038086, 0.057861328, 0.01586914, 0.0058288574, -0.056152344, -0.020629883, 0.03173828, 0.034423828, 0.029785156, 0.030639648, -0.016235352, 0.03930664, 0.0063171387, -0.016235352, 0.038085938, -0.018554688, 0.0014419556, 0.014892578, -0.010559082, 0.02709961, -0.0013885498, -0.008666992, -0.012451172, -0.0005226135, -0.003692627, 0.048095703, -0.0047302246, 0.021606445, 0.048339844, 0.001335144, -0.014587402, -0.050048828, -0.033935547, -0.060546875, -0.013305664, -0.04711914, -0.0234375, -0.034423828, 0.0126953125, -0.002105713, -0.05859375, -0.021362305, -0.00065231323, -0.05517578, 0.022949219, 0.018432617, 0.04272461, -0.038330078, -0.00592041, 0.01361084, 0.016723633, 0.041015625, -0.041259766, -0.03955078, 0.018798828, 0.038085938, -0.014099121, 0.04638672, 0.01977539, 0.0027770996, 0.043945312, 0.041503906, 0.024780273, 0.023925781, -0.008117676, -0.048583984, -0.018676758, -0.052490234, -0.01940918, -0.025024414, 0.027832031, 0.017822266, 0.020385742, -0.03540039, -0.016723633, -0.020385742, -0.022705078, -0.052246094, -0.032958984, -0.036621094, 0.04711914, -0.014343262, -0.0055236816, -0.012634277, 0.028198242, -0.025268555, -0.026123047, -0.008239746, 0.005584717, 0.0146484375, -0.076660156, 0.0024414062, -0.011108398, 0.025512695, 0.0004272461, 0.035888672, 0.04711914, 0.030151367, 0.0036010742, 0.0033721924, 0.01977539, 0.03564453, -0.022460938, 0.008666992, -0.0033569336, -0.06591797, 0.032958984, -0.002746582, -0.051513672, -0.013977051, -0.030029297, 0.013916016, -0.024536133, 0.010375977, 0.015625, 0.024658203, -0.007019043, 0.0020599365, 0.080566406, -0.050048828, 0.032714844, 0.050048828, -0.06298828, 0.020874023, 0.01574707, -0.07421875, -0.040527344, -0.041748047, 0.036132812, -0.020507812, -0.004272461, 0.035888672, 0.024414062, -0.0024871826, -0.007293701, 0.018066406, -0.052001953, -0.03564453, -0.049072266, -0.022094727, -0.0119018555, 0.019897461, -0.007873535, -0.00970459, -0.022460938, 0.042236328, -0.025756836, 0.018798828, 0.037353516, -0.017211914, -0.005065918, 0.0058898926, -0.004425049, -0.011962891, 0.008361816, 0.009765625, -0.026000977, -0.021728516, -0.00048828125, 0.03881836, -0.047607422, 0.032226562, 0.048339844, -0.0030822754, -0.021240234, 0.021484375, -0.029663086, -0.0028381348, 0.013366699, 0.07080078, -0.003616333, -0.029663086, -0.009399414, 0.10253906, -0.020996094, -0.008361816, -0.05053711, -0.020874023, 0.03857422, 0.03466797, 0.024658203, 0.010498047, 0.026611328, -0.039794922, 0.003829956, -0.02355957, -0.036376953, 0.048583984, 0.060546875, -0.026733398, -0.013183594, -0.04345703, -0.028930664, 0.040283203, 0.01361084, -0.013549805, -0.008605957, 0.01586914, 0.030029297, 0.004119873, -0.020874023, 0.022338867, -0.084472656, -0.0027770996, 0.055419922, -0.010925293, -0.010986328, -0.056396484, 0.044189453, 0.032958984, 0.020263672, 0.02758789, 0.025390625, -0.018554688, 0.009887695, 0.061523438, 0.05078125, -0.012329102, 0.012634277, 0.027832031, -0.03125, 0.0062561035, -0.010375977, -0.021362305, 0.013549805, -0.015075684, -0.028198242, -0.01977539, -0.0023651123, 0.0074768066, -0.048828125, 0.049072266, 0.06542969, -0.0035858154, -0.057617188, 0.021240234, 0.01574707, -0.0016021729, -0.032714844, -0.015136719, -0.02746582, 0.017578125, -0.020263672, -0.060791016, 0.012512207, -0.030517578, -0.045654297, 0.04321289, -0.037353516, -0.03857422, -0.011291504, -0.010437012, 0.017089844, 0.026245117, -0.0033874512, -0.026245117, -0.02758789, 0.029785156, 0.021362305, -0.038330078, -0.013427734, 0.0012130737, 0.030761719, -0.03149414, -0.030639648, 0.01361084, 0.009521484, 0.0035552979, -0.021972656, 0.023803711, 0.04321289, 0.021606445, 0.037841797, 0.012451172, -0.018066406, 0.00390625, 0.013366699, 0.037597656, 0.019042969, -0.02355957, -0.01171875, -0.022705078, -0.026245117, 0.05810547, 0.033203125, 0.048828125, -0.041992188, -0.03515625, 0.025268555, -0.033447266, 0.020996094, -0.020019531, 0.008605957, 0.014038086, -0.0034332275, 0.0012893677, -0.010864258, 0.00021743774, 0.023925781, -0.026000977, -0.007385254, 0.032226562, 0.083984375, -0.052001953, -0.026123047, 0.03125, -0.03149414, 0.032958984, 0.04296875, -0.022583008, 0.00075912476, 0.009765625, -0.020629883, -0.039794922, 0.025024414, 0.025512695, -0.018310547, -0.018432617, 0.02331543, -0.017578125, 0.052001953, -0.04248047, 0.010192871, 0.0077209473, -0.03515625, 0.051513672, -0.03515625, 0.00037384033, -0.03930664, -0.011779785, 0.025756836, 0.021240234, -0.0068969727, -0.0076293945, -0.016113281, -0.032470703, 0.05908203, -0.025146484, 0.0073547363, 0.023803711, 0.008544922, 0.00592041, 0.034423828, -0.084472656, 0.05102539, -0.010070801, 0.03564453, -0.012756348, -0.013671875, -0.012817383, -0.024902344, -0.012756348, -0.018188477, 0.0002822876, -0.022460938, 0.00970459, -0.032226562, -0.004119873, 0.001876831, -0.037353516, -0.043701172, 0.021850586, -0.024291992, 0.01953125, 0.026489258, -0.005065918, -0.036621094, 0.045898438, 0.017089844, 0.00022411346, -0.033691406, -0.0079956055, 0.044677734, -0.03930664, -0.02734375, -0.052490234, -0.045166016, -0.013122559, -0.03173828, -0.004852295, -0.05493164, 0.01574707, 0.011291504, 0.0019989014, -0.05126953, -0.040527344, 0.041259766, -0.028930664, 0.028198242, -0.029174805, -0.0068359375, 0.0013122559, -0.010864258, -0.038085938, 0.023803711, -0.017822266, 0.010986328, -0.045654297, 0.028686523, 0.013916016, 0.029174805, -0.0064086914, 0.020629883, 0.013671875, -0.017333984, 0.083984375, -0.03125, -0.017700195, -0.032226562, 0.0030517578, 0.043701172, 0.012329102, 0.021240234, -0.036621094, 0.029418945, 0.0018920898, 0.033691406, -0.0146484375, 0.020629883, -0.030639648, 0.064941406, 0.020019531, 0.0154418945, -0.047851562, 0.022583008, 0.03955078, -0.032470703, 0.02368164, 0.071777344, -0.030517578, -0.027954102, 0.037841797, -0.049804688, -0.0064086914, -0.0027923584, 0.0040893555, -0.001121521, 0.010559082, -0.021484375, -0.022583008, -0.064453125, 0.00592041, -0.037109375, 0.010681152, -0.037841797, 0.071777344, 0.026489258, 0.012023926, -0.021850586, -0.018920898, -0.03149414, 0.016601562, -0.004699707, -0.040283203, 0.053222656, 0.014038086, 0.02368164, -0.0077209473, -0.008728027, 0.0069274902, -0.028808594, 0.040283203, 0.014953613, 0.020263672, -0.029785156, -0.011047363, -0.007751465, -0.044189453, -0.021972656, 0.0010681152, 0.037353516, 0.00045013428, -0.0016326904, -0.02722168, -0.05493164, 0.0119018555, 0.013793945, 0.04272461, 0.020141602, 0.014465332, -0.00793457, -0.072753906, -0.025634766, 0.020263672, 0.0027160645, -0.024047852, 0.055419922, 0.0054626465, 0.03100586, -0.013793945, -0.012756348, 0.028808594, 0.014526367, 0.013916016, -0.0036468506, -0.008117676, -0.032470703, 0.022094727, 0.0020751953, -0.0014877319, -0.021240234, 0.009765625, 0.016723633, -0.016357422, 0.041015625, -0.004852295, -0.02355957, -0.005340576, -0.021606445, -0.023803711, -0.03881836, 0.0018234253, 0.0009994507, -0.02758789, -0.0051574707, -0.0013275146, -0.015197754, 0.026245117, -0.013183594, -0.019165039, 0.018554688, -0.027954102, -0.03955078, -0.029785156, 0.012756348, -0.009765625, -0.013977051, 0.032226562, 0.012023926, -0.016479492, 0.010437012, -0.05126953, -0.0234375, -0.034423828, 0.07128906, 0.0070495605, -0.03564453, 0.036132812, -0.0076904297, -0.012878418, -0.0014572144, -0.0234375, -0.004486084, 0.010559082, 0.004852295, 0.030029297, 0.027954102, 0.027832031, -0.007385254, 0.03491211, -0.016967773, -0.025634766, -0.010253906, 0.052490234, -0.024536133, -0.048583984, 0.008056641, 0.053710938, -0.053710938, 0.020141602, 0.0007209778, -0.07861328, 0.009216309, 0.027954102, -0.013793945, -0.025024414, -0.0068969727, -0.013000488, -0.002380371, -0.008300781, 0.029663086, -0.05102539, -0.013427734, -0.030395508, 0.036865234, -0.022338867, -0.010925293, 0.0036468506, 0.0058288574, 0.0059814453, -0.036132812, -0.022827148, 0.009155273, 0.049560547, -0.018310547, 0.047607422, 0.051513672, 0.024658203, 0.041503906, -0.012145996, 0.037109375, -0.041992188, 0.059570312, -0.006652832, 0.0015106201, 0.009277344, 0.016235352, -0.021972656, -0.007751465, 0.008605957, 0.004638672, -0.009094238, -0.041015625, 0.004058838, 0.015991211, -0.049316406, -0.010559082, -0.040527344, -0.01928711, -0.07763672, 0.022949219, 0.0043029785, 0.032226562, 0.0018615723, -0.048095703, -0.026123047, 0.03491211, -0.014038086, 0.0054626465, -0.01373291, 0.036621094, 0.014099121, 0.052734375, -0.0056152344, -0.036621094, 0.07421875, -0.008239746, -0.029174805, 0.026611328, 0.03955078, -0.020629883, -0.048095703, -0.012756348, 0.037109375, 0.004058838, 0.003479004, 0.036865234, -0.003326416, -0.014587402, 0.012390137, 0.012023926, -0.036621094, 0.021240234, 0.021118164, 0.015380859, 0.017822266, 0.022949219, -0.032714844, 0.0051574707, 0.013977051, 0.009033203, -0.025756836, 0.011169434, -0.007446289, 0.009765625, -0.009460449, 0.036132812, 0.0390625, 0.034179688, -0.03491211, -0.030029297, -0.011352539, -0.0073547363, -0.007659912, -0.037109375, -0.016235352, 0.03564453, 0.012878418, 0.036865234, -0.014343262, -0.06347656, 0.0067443848, 0.063964844, 0.048583984, 0.008850098, 0.008850098, -0.01586914, -0.012939453, -0.045410156, 0.00012302399, -0.014343262, 0.056396484, 0.03564453, -0.008117676, -0.01574707, 0.0058288574, -0.035888672, -0.012451172, -0.055908203, 0.026367188, -0.08496094, -0.00049972534, 0.04248047, -0.048339844, 0.049804688, 0.013305664, 0.052490234, 0.041992188, 0.028564453, 0.017822266, 0.029907227, 0.03930664, -0.068847656, -0.017211914, 0.028442383, -0.008728027, 0.018676758, 0.006652832, -0.060791016, 0.0154418945, 0.048095703, -0.024902344, 0.037109375, 0.016845703, 0.0062561035, 0.009521484, 0.023071289, -0.032714844, -0.00982666, 0.034423828, -0.006439209, 0.029418945, -0.013977051, -0.015563965, 0.040283203, -0.008544922, 0.024169922, -0.025268555, -0.0008125305, -0.015563965, -0.040771484, 0.009277344, -0.012390137, 0.044189453, 0.022094727, 0.008422852, -0.010803223, -0.0025024414, 0.036621094, -0.02722168, 0.035888672, 0.022216797, 0.037597656, 0.008422852, -0.029663086, 0.0046081543, -0.03466797, -0.05493164, -0.057861328, 0.0053100586, 0.013122559, 0.06689453, 0.026000977, 0.032226562, -0.010070801, -0.011169434, 0.083984375, -0.0025177002, 0.052490234, -0.028076172, 0.0046081543, 0.01940918, -0.008117676, 0.018554688, 0.017700195, -0.022216797, -0.052246094, -0.0028686523, -0.027832031, 0.028808594, 0.0033111572, 0.0047302246, 0.053466797, -0.06689453, 0.041015625, 0.053955078, -0.013427734, 0.008239746, -0.018310547, -0.051757812, 0.044677734, -0.024780273, -0.0042419434, 0.029541016, 0.017089844, 0.025268555, 0.004211426, 0.00982666, -0.008483887, 0.018920898, 0.044189453, -0.043945312, -0.0019226074, -0.022583008, 0.0032806396, 0.061523438, -0.003967285, -0.06542969, 0.017700195, -0.034179688, -0.018554688, 0.012084961, -0.048828125, -0.05883789, -0.03955078, -0.014404297, -0.034179688, -0.011962891, 0.0057373047, 0.018554688, 0.035888672, 0.034179688, -0.03930664, 0.016113281, -0.032958984, 0.04345703, 0.041503906, 0.003692627, 0.02331543, 0.0036010742, 0.035888672, -0.0390625, 0.036621094, 0.008056641, 0.084472656, 0.030395508, 0.022583008, 0.033691406, 0.07324219, -0.014099121, -0.0703125, -0.0024261475, 0.0234375, -0.013916016, -0.067871094, -0.0039367676, 0.011413574, 0.05883789, -0.00579834, -0.041259766, 0.035888672, 0.056152344, -0.007293701, 0.025512695, -0.01586914, 0.012268066, -0.0037841797, 0.00793457, -0.0015182495, 0.026611328, -0.028808594, 0.0008506775, 0.034423828, -0.012817383, 0.020996094, -0.037597656, -0.020874023, 0.027709961, -0.028320312, 0.012145996, -0.0005912781, -0.028564453, -0.01574707, 0.04321289, 0.014770508, -0.041503906, 0.047851562, 0.018188477, 0.015563965, 0.04345703, -0.020507812, 0.002746582, 0.06542969, -0.00075912476, 0.021240234, -0.04296875, -0.018310547, 0.014343262, -0.006439209, 0.026855469, 0.03112793, 0.0025024414, -0.009155273, 0.0040283203, 0.013793945, 0.048339844, -0.083984375, 0.00038337708, 0.06542969, 0.0390625, 0.05444336, 0.050048828, 0.033203125, -0.022705078, 0.024536133, -0.024414062, -0.025268555, -0.011779785, -0.053466797, 0.0026550293, 0.003479004, -0.008483887, -0.023071289, 0.028686523, -0.001449585, -0.03955078, 0.019897461, 0.030273438, -0.02331543, -0.05078125, 0.03491211, -0.028808594, 0.043701172, -0.046142578, 0.020996094, -0.005584717, 0.056884766, 0.015136719, -0.005584717, 0.025268555, -0.05908203, -0.061767578, 0.04248047, -0.007446289, 0.038085938, -0.047851562, -0.006500244, 0.040771484, -0.052001953, -0.02709961, 0.032470703, -0.021728516, 0.0010757446, 0.020385742, -0.017456055, -0.028442383, -0.063964844, -0.07373047, -0.052246094, 0.015563965, 0.044189453, 0.008850098, 0.014160156, 0.048828125, -0.017211914, -0.020019531, 0.05419922]\n"
+ ]
+ }
+ ],
+ "source": [
+ "client = TwelveLabs(api_key=TL_API_KEY)\n",
+ "user_query = \"Santa Claus on his sleigh\"\n",
+ "\n",
+ "res = client.embed.create(\n",
+ " model_name=\"Marengo-retrieval-2.7\",\n",
+ " text=user_query,\n",
+ ")\n",
+ "\n",
+ "print(\"Created a text embedding\")\n",
+ "print(f\" Model: {res.model_name}\")\n",
+ "if res.text_embedding is not None and res.text_embedding.segments is not None:\n",
+ " q_embedding = res.text_embedding.segments[0].embeddings_float\n",
+ " print(f\" Embedding Dimension: {len(q_embedding)}\")\n",
+ " print(q_embedding[5:])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "oOR1PuSRSlh4",
+ "metadata": {
+ "id": "oOR1PuSRSlh4"
+ },
+ "source": [
+ "The following uses dense vector representations of the query embedding obtained previously and document and matching is performed and accelerated by Vespa's support for\n",
+ "[approximate nearest neighbor search](https://docs.vespa.ai/en/approximate-nn-hnsw.html).\n",
+ "\n",
+ "The output is limited to the top 1 hit, as we only have a sample of 3 videos. The top hit returned was based on a hybrid ranking based on a bm25 ranking based on a lexical search on the text, keywords and summary of the video, performed as a first phase, and similarity search on the embeddings.\n",
+ "\n",
+ "We can see as part of the `match-features`, the segment 212 in the video was the one providing the highest match.\n",
+ "\n",
+ "We also calculate the similarities as part of the `summary-features` for the rest of the segments so we can look for top N segments within a video, optionally.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "N4XEyB4pYC7l",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "N4XEyB4pYC7l",
+ "outputId": "b729544a-373d-492e-e181-ebbb736bb803"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{\n",
+ " \"id\": \"id:videos:videos::13bcb994b389c9d925993e611877e40b\",\n",
+ " \"relevance\": 0.47162757625475055,\n",
+ " \"source\": \"videosearch_content\",\n",
+ " \"fields\": {\n",
+ " \"matchfeatures\": {\n",
+ " \"closest(embeddings)\": {\n",
+ " \"type\": \"tensor(p{})\",\n",
+ " \"cells\": {\n",
+ " \"212\": 1.0\n",
+ " }\n",
+ " }\n",
+ " },\n",
+ " \"sddocname\": \"videos\",\n",
+ " \"documentid\": \"id:videos:videos::13bcb994b389c9d925993e611877e40b\",\n",
+ " \"video_url\": \"https://ia601401.us.archive.org/1/items/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net.mp4\",\n",
+ " \"title\": \"Twas the night before Christmas\",\n",
+ " \"keywords\": \"Christmas, Santa Claus, Animated Special, Holiday Adventure, Teamwork\",\n",
+ " \"video_summary\": \"In Junctionville, Santa cancels Christmas after receiving an anonymous letter claiming he doesn\\u2019t exist. Joshua Trundle and Father Mouse create a special clock tower to win Santa back. Albert Mouse accidentally breaks the clock mechanism, causing public embarrassment. Feeling remorseful, Albert repairs the clock just before Christmas Eve. The clock\\u2019s magical song convinces Santa to return and restore Christmas.\",\n",
+ " \"embedding_scope\": \"clip\",\n",
+ " \"start_offset_sec\": [\n",
+ " 0.0,\n",
+ " 6.0,\n",
+ " 12.0,\n",
+ " 18.0,\n",
+ " 24.0,\n",
+ " 30.0,\n",
+ " 36.0,\n",
+ " 42.0,\n",
+ " 48.0,\n",
+ " 54.0,\n",
+ " 60.0,\n",
+ " 66.0,\n",
+ " 72.0,\n",
+ " 78.0,\n",
+ " 84.0,\n",
+ " 90.0,\n",
+ " 96.0,\n",
+ " 102.0,\n",
+ " 108.0,\n",
+ " 114.0,\n",
+ " 120.0,\n",
+ " 126.0,\n",
+ " 132.0,\n",
+ " 138.0,\n",
+ " 144.0,\n",
+ " 150.0,\n",
+ " 156.0,\n",
+ " 162.0,\n",
+ " 168.0,\n",
+ " 174.0,\n",
+ " 180.0,\n",
+ " 186.0,\n",
+ " 192.0,\n",
+ " 198.0,\n",
+ " 204.0,\n",
+ " 210.0,\n",
+ " 216.0,\n",
+ " 222.0,\n",
+ " 228.0,\n",
+ " 234.0,\n",
+ " 240.0,\n",
+ " 246.0,\n",
+ " 252.0,\n",
+ " 258.0,\n",
+ " 264.0,\n",
+ " 270.0,\n",
+ " 276.0,\n",
+ " 282.0,\n",
+ " 288.0,\n",
+ " 294.0,\n",
+ " 300.0,\n",
+ " 306.0,\n",
+ " 312.0,\n",
+ " 318.0,\n",
+ " 324.0,\n",
+ " 330.0,\n",
+ " 336.0,\n",
+ " 342.0,\n",
+ " 348.0,\n",
+ " 354.0,\n",
+ " 360.0,\n",
+ " 366.0,\n",
+ " 372.0,\n",
+ " 378.0,\n",
+ " 384.0,\n",
+ " 390.0,\n",
+ " 396.0,\n",
+ " 402.0,\n",
+ " 408.0,\n",
+ " 414.0,\n",
+ " 420.0,\n",
+ " 426.0,\n",
+ " 432.0,\n",
+ " 438.0,\n",
+ " 444.0,\n",
+ " 450.0,\n",
+ " 456.0,\n",
+ " 462.0,\n",
+ " 468.0,\n",
+ " 474.0,\n",
+ " 480.0,\n",
+ " 486.0,\n",
+ " 492.0,\n",
+ " 498.0,\n",
+ " 504.0,\n",
+ " 510.0,\n",
+ " 516.0,\n",
+ " 522.0,\n",
+ " 528.0,\n",
+ " 534.0,\n",
+ " 540.0,\n",
+ " 546.0,\n",
+ " 552.0,\n",
+ " 558.0,\n",
+ " 564.0,\n",
+ " 570.0,\n",
+ " 576.0,\n",
+ " 582.0,\n",
+ " 588.0,\n",
+ " 594.0,\n",
+ " 600.0,\n",
+ " 606.0,\n",
+ " 612.0,\n",
+ " 618.0,\n",
+ " 624.0,\n",
+ " 630.0,\n",
+ " 636.0,\n",
+ " 642.0,\n",
+ " 648.0,\n",
+ " 654.0,\n",
+ " 660.0,\n",
+ " 666.0,\n",
+ " 672.0,\n",
+ " 678.0,\n",
+ " 684.0,\n",
+ " 690.0,\n",
+ " 696.0,\n",
+ " 702.0,\n",
+ " 708.0,\n",
+ " 714.0,\n",
+ " 720.0,\n",
+ " 726.0,\n",
+ " 732.0,\n",
+ " 738.0,\n",
+ " 744.0,\n",
+ " 750.0,\n",
+ " 756.0,\n",
+ " 762.0,\n",
+ " 768.0,\n",
+ " 774.0,\n",
+ " 780.0,\n",
+ " 786.0,\n",
+ " 792.0,\n",
+ " 798.0,\n",
+ " 804.0,\n",
+ " 810.0,\n",
+ " 816.0,\n",
+ " 822.0,\n",
+ " 828.0,\n",
+ " 834.0,\n",
+ " 840.0,\n",
+ " 846.0,\n",
+ " 852.0,\n",
+ " 858.0,\n",
+ " 864.0,\n",
+ " 870.0,\n",
+ " 876.0,\n",
+ " 882.0,\n",
+ " 888.0,\n",
+ " 894.0,\n",
+ " 900.0,\n",
+ " 906.0,\n",
+ " 912.0,\n",
+ " 918.0,\n",
+ " 924.0,\n",
+ " 930.0,\n",
+ " 936.0,\n",
+ " 942.0,\n",
+ " 948.0,\n",
+ " 954.0,\n",
+ " 960.0,\n",
+ " 966.0,\n",
+ " 972.0,\n",
+ " 978.0,\n",
+ " 984.0,\n",
+ " 990.0,\n",
+ " 996.0,\n",
+ " 1002.0,\n",
+ " 1008.0,\n",
+ " 1014.0,\n",
+ " 1020.0,\n",
+ " 1026.0,\n",
+ " 1032.0,\n",
+ " 1038.0,\n",
+ " 1044.0,\n",
+ " 1050.0,\n",
+ " 1056.0,\n",
+ " 1062.0,\n",
+ " 1068.0,\n",
+ " 1074.0,\n",
+ " 1080.0,\n",
+ " 1086.0,\n",
+ " 1092.0,\n",
+ " 1098.0,\n",
+ " 1104.0,\n",
+ " 1110.0,\n",
+ " 1116.0,\n",
+ " 1122.0,\n",
+ " 1128.0,\n",
+ " 1134.0,\n",
+ " 1140.0,\n",
+ " 1146.0,\n",
+ " 1152.0,\n",
+ " 1158.0,\n",
+ " 1164.0,\n",
+ " 1170.0,\n",
+ " 1176.0,\n",
+ " 1182.0,\n",
+ " 1188.0,\n",
+ " 1194.0,\n",
+ " 1200.0,\n",
+ " 1206.0,\n",
+ " 1212.0,\n",
+ " 1218.0,\n",
+ " 1224.0,\n",
+ " 1230.0,\n",
+ " 1236.0,\n",
+ " 1242.0,\n",
+ " 1248.0,\n",
+ " 1254.0,\n",
+ " 1260.0,\n",
+ " 1266.0,\n",
+ " 1272.0,\n",
+ " 1278.0,\n",
+ " 1284.0,\n",
+ " 1290.0,\n",
+ " 1296.0,\n",
+ " 1302.0,\n",
+ " 1308.0,\n",
+ " 1314.0,\n",
+ " 1320.0,\n",
+ " 1326.0,\n",
+ " 1332.0,\n",
+ " 1338.0,\n",
+ " 1344.0,\n",
+ " 1350.0,\n",
+ " 1356.0,\n",
+ " 1362.0,\n",
+ " 1368.0,\n",
+ " 1374.0,\n",
+ " 1380.0,\n",
+ " 1386.0,\n",
+ " 1392.0,\n",
+ " 1398.0,\n",
+ " 1404.0,\n",
+ " 1410.0,\n",
+ " 1416.0,\n",
+ " 1422.0,\n",
+ " 1428.0,\n",
+ " 1434.0,\n",
+ " 1440.0,\n",
+ " 1446.0\n",
+ " ],\n",
+ " \"end_offset_sec\": [\n",
+ " 6.0,\n",
+ " 12.0,\n",
+ " 18.0,\n",
+ " 24.0,\n",
+ " 30.0,\n",
+ " 36.0,\n",
+ " 42.0,\n",
+ " 48.0,\n",
+ " 54.0,\n",
+ " 60.0,\n",
+ " 66.0,\n",
+ " 72.0,\n",
+ " 78.0,\n",
+ " 84.0,\n",
+ " 90.0,\n",
+ " 96.0,\n",
+ " 102.0,\n",
+ " 108.0,\n",
+ " 114.0,\n",
+ " 120.0,\n",
+ " 126.0,\n",
+ " 132.0,\n",
+ " 138.0,\n",
+ " 144.0,\n",
+ " 150.0,\n",
+ " 156.0,\n",
+ " 162.0,\n",
+ " 168.0,\n",
+ " 174.0,\n",
+ " 180.0,\n",
+ " 186.0,\n",
+ " 192.0,\n",
+ " 198.0,\n",
+ " 204.0,\n",
+ " 210.0,\n",
+ " 216.0,\n",
+ " 222.0,\n",
+ " 228.0,\n",
+ " 234.0,\n",
+ " 240.0,\n",
+ " 246.0,\n",
+ " 252.0,\n",
+ " 258.0,\n",
+ " 264.0,\n",
+ " 270.0,\n",
+ " 276.0,\n",
+ " 282.0,\n",
+ " 288.0,\n",
+ " 294.0,\n",
+ " 300.0,\n",
+ " 306.0,\n",
+ " 312.0,\n",
+ " 318.0,\n",
+ " 324.0,\n",
+ " 330.0,\n",
+ " 336.0,\n",
+ " 342.0,\n",
+ " 348.0,\n",
+ " 354.0,\n",
+ " 360.0,\n",
+ " 366.0,\n",
+ " 372.0,\n",
+ " 378.0,\n",
+ " 384.0,\n",
+ " 390.0,\n",
+ " 396.0,\n",
+ " 402.0,\n",
+ " 408.0,\n",
+ " 414.0,\n",
+ " 420.0,\n",
+ " 426.0,\n",
+ " 432.0,\n",
+ " 438.0,\n",
+ " 444.0,\n",
+ " 450.0,\n",
+ " 456.0,\n",
+ " 462.0,\n",
+ " 468.0,\n",
+ " 474.0,\n",
+ " 480.0,\n",
+ " 486.0,\n",
+ " 492.0,\n",
+ " 498.0,\n",
+ " 504.0,\n",
+ " 510.0,\n",
+ " 516.0,\n",
+ " 522.0,\n",
+ " 528.0,\n",
+ " 534.0,\n",
+ " 540.0,\n",
+ " 546.0,\n",
+ " 552.0,\n",
+ " 558.0,\n",
+ " 564.0,\n",
+ " 570.0,\n",
+ " 576.0,\n",
+ " 582.0,\n",
+ " 588.0,\n",
+ " 594.0,\n",
+ " 600.0,\n",
+ " 606.0,\n",
+ " 612.0,\n",
+ " 618.0,\n",
+ " 624.0,\n",
+ " 630.0,\n",
+ " 636.0,\n",
+ " 642.0,\n",
+ " 648.0,\n",
+ " 654.0,\n",
+ " 660.0,\n",
+ " 666.0,\n",
+ " 672.0,\n",
+ " 678.0,\n",
+ " 684.0,\n",
+ " 690.0,\n",
+ " 696.0,\n",
+ " 702.0,\n",
+ " 708.0,\n",
+ " 714.0,\n",
+ " 720.0,\n",
+ " 726.0,\n",
+ " 732.0,\n",
+ " 738.0,\n",
+ " 744.0,\n",
+ " 750.0,\n",
+ " 756.0,\n",
+ " 762.0,\n",
+ " 768.0,\n",
+ " 774.0,\n",
+ " 780.0,\n",
+ " 786.0,\n",
+ " 792.0,\n",
+ " 798.0,\n",
+ " 804.0,\n",
+ " 810.0,\n",
+ " 816.0,\n",
+ " 822.0,\n",
+ " 828.0,\n",
+ " 834.0,\n",
+ " 840.0,\n",
+ " 846.0,\n",
+ " 852.0,\n",
+ " 858.0,\n",
+ " 864.0,\n",
+ " 870.0,\n",
+ " 876.0,\n",
+ " 882.0,\n",
+ " 888.0,\n",
+ " 894.0,\n",
+ " 900.0,\n",
+ " 906.0,\n",
+ " 912.0,\n",
+ " 918.0,\n",
+ " 924.0,\n",
+ " 930.0,\n",
+ " 936.0,\n",
+ " 942.0,\n",
+ " 948.0,\n",
+ " 954.0,\n",
+ " 960.0,\n",
+ " 966.0,\n",
+ " 972.0,\n",
+ " 978.0,\n",
+ " 984.0,\n",
+ " 990.0,\n",
+ " 996.0,\n",
+ " 1002.0,\n",
+ " 1008.0,\n",
+ " 1014.0,\n",
+ " 1020.0,\n",
+ " 1026.0,\n",
+ " 1032.0,\n",
+ " 1038.0,\n",
+ " 1044.0,\n",
+ " 1050.0,\n",
+ " 1056.0,\n",
+ " 1062.0,\n",
+ " 1068.0,\n",
+ " 1074.0,\n",
+ " 1080.0,\n",
+ " 1086.0,\n",
+ " 1092.0,\n",
+ " 1098.0,\n",
+ " 1104.0,\n",
+ " 1110.0,\n",
+ " 1116.0,\n",
+ " 1122.0,\n",
+ " 1128.0,\n",
+ " 1134.0,\n",
+ " 1140.0,\n",
+ " 1146.0,\n",
+ " 1152.0,\n",
+ " 1158.0,\n",
+ " 1164.0,\n",
+ " 1170.0,\n",
+ " 1176.0,\n",
+ " 1182.0,\n",
+ " 1188.0,\n",
+ " 1194.0,\n",
+ " 1200.0,\n",
+ " 1206.0,\n",
+ " 1212.0,\n",
+ " 1218.0,\n",
+ " 1224.0,\n",
+ " 1230.0,\n",
+ " 1236.0,\n",
+ " 1242.0,\n",
+ " 1248.0,\n",
+ " 1254.0,\n",
+ " 1260.0,\n",
+ " 1266.0,\n",
+ " 1272.0,\n",
+ " 1278.0,\n",
+ " 1284.0,\n",
+ " 1290.0,\n",
+ " 1296.0,\n",
+ " 1302.0,\n",
+ " 1308.0,\n",
+ " 1314.0,\n",
+ " 1320.0,\n",
+ " 1326.0,\n",
+ " 1332.0,\n",
+ " 1338.0,\n",
+ " 1344.0,\n",
+ " 1350.0,\n",
+ " 1356.0,\n",
+ " 1362.0,\n",
+ " 1368.0,\n",
+ " 1374.0,\n",
+ " 1380.0,\n",
+ " 1386.0,\n",
+ " 1392.0,\n",
+ " 1398.0,\n",
+ " 1404.0,\n",
+ " 1410.0,\n",
+ " 1416.0,\n",
+ " 1422.0,\n",
+ " 1428.0,\n",
+ " 1434.0,\n",
+ " 1440.0,\n",
+ " 1446.0,\n",
+ " 1448.800048828125\n",
+ " ],\n",
+ " \"summaryfeatures\": {\n",
+ " \"similarities\": {\n",
+ " \"type\": \"tensor(p{})\",\n",
+ " \"cells\": {\n",
+ " \"0\": 0.26801764965057373,\n",
+ " \"1\": 0.25517868995666504,\n",
+ " \"2\": 0.18696394562721252,\n",
+ " \"3\": 0.21889425814151764,\n",
+ " \"4\": 0.1529473513364792,\n",
+ " \"5\": 0.14913758635520935,\n",
+ " \"6\": 0.23517441749572754,\n",
+ " \"7\": 0.2353234440088272,\n",
+ " \"8\": 0.21242228150367737,\n",
+ " \"9\": 0.1885206401348114,\n",
+ " \"10\": 0.2210436314344406,\n",
+ " \"11\": 0.22923655807971954,\n",
+ " \"12\": 0.23505590856075287,\n",
+ " \"13\": 0.1909424066543579,\n",
+ " \"14\": 0.19049349427223206,\n",
+ " \"15\": 0.2125398814678192,\n",
+ " \"16\": 0.1923658549785614,\n",
+ " \"17\": 0.18145661056041718,\n",
+ " \"18\": 0.24124839901924133,\n",
+ " \"19\": 0.1750694066286087,\n",
+ " \"20\": 0.1891847550868988,\n",
+ " \"21\": 0.18880757689476013,\n",
+ " \"22\": 0.16376550495624542,\n",
+ " \"23\": 0.23028762638568878,\n",
+ " \"24\": 0.26554375886917114,\n",
+ " \"25\": 0.19489403069019318,\n",
+ " \"26\": 0.19068247079849243,\n",
+ " \"27\": 0.17556644976139069,\n",
+ " \"28\": 0.1727730631828308,\n",
+ " \"29\": 0.18818673491477966,\n",
+ " \"30\": 0.18307140469551086,\n",
+ " \"31\": 0.18322986364364624,\n",
+ " \"32\": 0.18332254886627197,\n",
+ " \"33\": 0.17737486958503723,\n",
+ " \"34\": 0.2054746448993683,\n",
+ " \"35\": 0.1936066746711731,\n",
+ " \"36\": 0.2030111700296402,\n",
+ " \"37\": 0.19140127301216125,\n",
+ " \"38\": 0.19714966416358948,\n",
+ " \"39\": 0.19746947288513184,\n",
+ " \"40\": 0.1792682260274887,\n",
+ " \"41\": 0.23100340366363525,\n",
+ " \"42\": 0.2737155854701996,\n",
+ " \"43\": 0.26417821645736694,\n",
+ " \"44\": 0.24081182479858398,\n",
+ " \"45\": 0.2589956820011139,\n",
+ " \"46\": 0.23437820374965668,\n",
+ " \"47\": 0.22961024940013885,\n",
+ " \"48\": 0.18857116997241974,\n",
+ " \"49\": 0.19216707348823547,\n",
+ " \"50\": 0.15921726822853088,\n",
+ " \"51\": 0.14634984731674194,\n",
+ " \"52\": 0.18848517537117004,\n",
+ " \"53\": 0.17523136734962463,\n",
+ " \"54\": 0.16829127073287964,\n",
+ " \"55\": 0.16766668856143951,\n",
+ " \"56\": 0.22314974665641785,\n",
+ " \"57\": 0.15624946355819702,\n",
+ " \"58\": 0.13081294298171997,\n",
+ " \"59\": 0.1503870189189911,\n",
+ " \"60\": 0.17631740868091583,\n",
+ " \"61\": 0.17871999740600586,\n",
+ " \"62\": 0.15325675904750824,\n",
+ " \"63\": 0.1618715524673462,\n",
+ " \"64\": 0.17556707561016083,\n",
+ " \"65\": 0.21091774106025696,\n",
+ " \"66\": 0.17583170533180237,\n",
+ " \"67\": 0.18759286403656006,\n",
+ " \"68\": 0.20933479070663452,\n",
+ " \"69\": 0.20815375447273254,\n",
+ " \"70\": 0.1988670527935028,\n",
+ " \"71\": 0.18068914115428925,\n",
+ " \"72\": 0.16562244296073914,\n",
+ " \"73\": 0.167647585272789,\n",
+ " \"74\": 0.17215758562088013,\n",
+ " \"75\": 0.16383600234985352,\n",
+ " \"76\": 0.18459394574165344,\n",
+ " \"77\": 0.19995972514152527,\n",
+ " \"78\": 0.1888064742088318,\n",
+ " \"79\": 0.17635037004947662,\n",
+ " \"80\": 0.20570701360702515,\n",
+ " \"81\": 0.20117110013961792,\n",
+ " \"82\": 0.13397029042243958,\n",
+ " \"83\": 0.12500479817390442,\n",
+ " \"84\": 0.10284098237752914,\n",
+ " \"85\": 0.17455658316612244,\n",
+ " \"86\": 0.20681756734848022,\n",
+ " \"87\": 0.2104944884777069,\n",
+ " \"88\": 0.18222680687904358,\n",
+ " \"89\": 0.204770028591156,\n",
+ " \"90\": 0.17314346134662628,\n",
+ " \"91\": 0.21035337448120117,\n",
+ " \"92\": 0.1841922402381897,\n",
+ " \"93\": 0.1664196252822876,\n",
+ " \"94\": 0.19048520922660828,\n",
+ " \"95\": 0.16612933576107025,\n",
+ " \"96\": 0.19302061200141907,\n",
+ " \"97\": 0.17973093688488007,\n",
+ " \"98\": 0.25269824266433716,\n",
+ " \"99\": 0.22977915406227112,\n",
+ " \"100\": 0.15784724056720734,\n",
+ " \"101\": 0.1700689196586609,\n",
+ " \"102\": 0.19267229735851288,\n",
+ " \"103\": 0.1957896649837494,\n",
+ " \"104\": 0.1793247014284134,\n",
+ " \"105\": 0.1642160564661026,\n",
+ " \"106\": 0.17960935831069946,\n",
+ " \"107\": 0.17307010293006897,\n",
+ " \"108\": 0.16801056265830994,\n",
+ " \"109\": 0.17970556020736694,\n",
+ " \"110\": 0.1971571445465088,\n",
+ " \"111\": 0.1935385763645172,\n",
+ " \"112\": 0.22264227271080017,\n",
+ " \"113\": 0.21915990114212036,\n",
+ " \"114\": 0.20777805149555206,\n",
+ " \"115\": 0.22468087077140808,\n",
+ " \"116\": 0.21406306326389313,\n",
+ " \"117\": 0.1938605159521103,\n",
+ " \"118\": 0.22885730862617493,\n",
+ " \"119\": 0.16865134239196777,\n",
+ " \"120\": 0.1872825175523758,\n",
+ " \"121\": 0.20210419595241547,\n",
+ " \"122\": 0.2006729543209076,\n",
+ " \"123\": 0.17872017621994019,\n",
+ " \"124\": 0.18020905554294586,\n",
+ " \"125\": 0.18884027004241943,\n",
+ " \"126\": 0.19896523654460907,\n",
+ " \"127\": 0.21124014258384705,\n",
+ " \"128\": 0.18319082260131836,\n",
+ " \"129\": 0.19869163632392883,\n",
+ " \"130\": 0.21987730264663696,\n",
+ " \"131\": 0.24737203121185303,\n",
+ " \"132\": 0.2196074277162552,\n",
+ " \"133\": 0.1948014348745346,\n",
+ " \"134\": 0.20482322573661804,\n",
+ " \"135\": 0.20723658800125122,\n",
+ " \"136\": 0.20293781161308289,\n",
+ " \"137\": 0.23808544874191284,\n",
+ " \"138\": 0.21687531471252441,\n",
+ " \"139\": 0.18854688107967377,\n",
+ " \"140\": 0.23239216208457947,\n",
+ " \"141\": 0.20674535632133484,\n",
+ " \"142\": 0.2039334923028946,\n",
+ " \"143\": 0.2214721441268921,\n",
+ " \"144\": 0.17064169049263,\n",
+ " \"145\": 0.17559164762496948,\n",
+ " \"146\": 0.18758957087993622,\n",
+ " \"147\": 0.2586555480957031,\n",
+ " \"148\": 0.2239076793193817,\n",
+ " \"149\": 0.1858271211385727,\n",
+ " \"150\": 0.18375852704048157,\n",
+ " \"151\": 0.16894754767417908,\n",
+ " \"152\": 0.17636868357658386,\n",
+ " \"153\": 0.1585116684436798,\n",
+ " \"154\": 0.20884966850280762,\n",
+ " \"155\": 0.19982419908046722,\n",
+ " \"156\": 0.19640682637691498,\n",
+ " \"157\": 0.205767422914505,\n",
+ " \"158\": 0.18988652527332306,\n",
+ " \"159\": 0.16192299127578735,\n",
+ " \"160\": 0.23090824484825134,\n",
+ " \"161\": 0.19387517869472504,\n",
+ " \"162\": 0.1778217852115631,\n",
+ " \"163\": 0.19742624461650848,\n",
+ " \"164\": 0.1976637989282608,\n",
+ " \"165\": 0.17239904403686523,\n",
+ " \"166\": 0.17611661553382874,\n",
+ " \"167\": 0.18090175092220306,\n",
+ " \"168\": 0.18562501668930054,\n",
+ " \"169\": 0.1913011372089386,\n",
+ " \"170\": 0.17979900538921356,\n",
+ " \"171\": 0.16499203443527222,\n",
+ " \"172\": 0.16726429760456085,\n",
+ " \"173\": 0.17212313413619995,\n",
+ " \"174\": 0.1911279559135437,\n",
+ " \"175\": 0.19671502709388733,\n",
+ " \"176\": 0.18979108333587646,\n",
+ " \"177\": 0.2068617343902588,\n",
+ " \"178\": 0.2212362289428711,\n",
+ " \"179\": 0.20381945371627808,\n",
+ " \"180\": 0.18463441729545593,\n",
+ " \"181\": 0.20046135783195496,\n",
+ " \"182\": 0.1857784539461136,\n",
+ " \"183\": 0.17102620005607605,\n",
+ " \"184\": 0.19620634615421295,\n",
+ " \"185\": 0.24402475357055664,\n",
+ " \"186\": 0.27124571800231934,\n",
+ " \"187\": 0.271205872297287,\n",
+ " \"188\": 0.25007861852645874,\n",
+ " \"189\": 0.2618427276611328,\n",
+ " \"190\": 0.258881151676178,\n",
+ " \"191\": 0.2477029263973236,\n",
+ " \"192\": 0.27978116273880005,\n",
+ " \"193\": 0.22998890280723572,\n",
+ " \"194\": 0.1600525677204132,\n",
+ " \"195\": 0.20765827596187592,\n",
+ " \"196\": 0.26462453603744507,\n",
+ " \"197\": 0.2529928982257843,\n",
+ " \"198\": 0.2546486258506775,\n",
+ " \"199\": 0.3909624218940735,\n",
+ " \"200\": 0.3817324638366699,\n",
+ " \"201\": 0.3716684579849243,\n",
+ " \"202\": 0.38000035285949707,\n",
+ " \"203\": 0.40329158306121826,\n",
+ " \"204\": 0.211330845952034,\n",
+ " \"205\": 0.291744202375412,\n",
+ " \"206\": 0.36191847920417786,\n",
+ " \"207\": 0.391671359539032,\n",
+ " \"208\": 0.40864360332489014,\n",
+ " \"209\": 0.40476706624031067,\n",
+ " \"210\": 0.4112417697906494,\n",
+ " \"211\": 0.40934425592422485,\n",
+ " \"212\": 0.43537065386772156,\n",
+ " \"213\": 0.3644255995750427,\n",
+ " \"214\": 0.3053430914878845,\n",
+ " \"215\": 0.2679027020931244,\n",
+ " \"216\": 0.3584190607070923,\n",
+ " \"217\": 0.3512270152568817,\n",
+ " \"218\": 0.2889820337295532,\n",
+ " \"219\": 0.271373987197876,\n",
+ " \"220\": 0.3240889310836792,\n",
+ " \"221\": 0.2890607714653015,\n",
+ " \"222\": 0.2631019651889801,\n",
+ " \"223\": 0.286709725856781,\n",
+ " \"224\": 0.290524423122406,\n",
+ " \"225\": 0.3164805471897125,\n",
+ " \"226\": 0.32039016485214233,\n",
+ " \"227\": 0.3310452699661255,\n",
+ " \"228\": 0.3357622027397156,\n",
+ " \"229\": 0.40372908115386963,\n",
+ " \"230\": 0.41800743341445923,\n",
+ " \"231\": 0.40599969029426575,\n",
+ " \"232\": 0.3756481409072876,\n",
+ " \"233\": 0.2771507501602173,\n",
+ " \"234\": 0.2839912176132202,\n",
+ " \"235\": 0.2823885977268219,\n",
+ " \"236\": 0.27640271186828613,\n",
+ " \"237\": 0.2688301205635071,\n",
+ " \"238\": 0.27843406796455383,\n",
+ " \"239\": 0.28141558170318604,\n",
+ " \"240\": 0.2451176792383194,\n",
+ " \"241\": 0.16267892718315125\n",
+ " }\n",
+ " },\n",
+ " \"vespa.summaryFeatures.cached\": 0.0\n",
+ " }\n",
+ " }\n",
+ "}\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'root': {'id': 'toplevel',\n",
+ " 'relevance': 1.0,\n",
+ " 'fields': {'totalCount': 3},\n",
+ " 'coverage': {'coverage': 100,\n",
+ " 'documents': 3,\n",
+ " 'full': True,\n",
+ " 'nodes': 1,\n",
+ " 'results': 1,\n",
+ " 'resultsFull': 1},\n",
+ " 'children': [{'id': 'id:videos:videos::13bcb994b389c9d925993e611877e40b',\n",
+ " 'relevance': 0.47162757625475055,\n",
+ " 'source': 'videosearch_content',\n",
+ " 'fields': {'matchfeatures': {'closest(embeddings)': {'type': 'tensor(p{})',\n",
+ " 'cells': {'212': 1.0}}},\n",
+ " 'sddocname': 'videos',\n",
+ " 'documentid': 'id:videos:videos::13bcb994b389c9d925993e611877e40b',\n",
+ " 'video_url': 'https://ia601401.us.archive.org/1/items/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net.mp4',\n",
+ " 'title': 'Twas the night before Christmas',\n",
+ " 'keywords': 'Christmas, Santa Claus, Animated Special, Holiday Adventure, Teamwork',\n",
+ " 'video_summary': 'In Junctionville, Santa cancels Christmas after receiving an anonymous letter claiming he doesn’t exist. Joshua Trundle and Father Mouse create a special clock tower to win Santa back. Albert Mouse accidentally breaks the clock mechanism, causing public embarrassment. Feeling remorseful, Albert repairs the clock just before Christmas Eve. The clock’s magical song convinces Santa to return and restore Christmas.',\n",
+ " 'embedding_scope': 'clip',\n",
+ " 'start_offset_sec': [0.0,\n",
+ " 6.0,\n",
+ " 12.0,\n",
+ " 18.0,\n",
+ " 24.0,\n",
+ " 30.0,\n",
+ " 36.0,\n",
+ " 42.0,\n",
+ " 48.0,\n",
+ " 54.0,\n",
+ " 60.0,\n",
+ " 66.0,\n",
+ " 72.0,\n",
+ " 78.0,\n",
+ " 84.0,\n",
+ " 90.0,\n",
+ " 96.0,\n",
+ " 102.0,\n",
+ " 108.0,\n",
+ " 114.0,\n",
+ " 120.0,\n",
+ " 126.0,\n",
+ " 132.0,\n",
+ " 138.0,\n",
+ " 144.0,\n",
+ " 150.0,\n",
+ " 156.0,\n",
+ " 162.0,\n",
+ " 168.0,\n",
+ " 174.0,\n",
+ " 180.0,\n",
+ " 186.0,\n",
+ " 192.0,\n",
+ " 198.0,\n",
+ " 204.0,\n",
+ " 210.0,\n",
+ " 216.0,\n",
+ " 222.0,\n",
+ " 228.0,\n",
+ " 234.0,\n",
+ " 240.0,\n",
+ " 246.0,\n",
+ " 252.0,\n",
+ " 258.0,\n",
+ " 264.0,\n",
+ " 270.0,\n",
+ " 276.0,\n",
+ " 282.0,\n",
+ " 288.0,\n",
+ " 294.0,\n",
+ " 300.0,\n",
+ " 306.0,\n",
+ " 312.0,\n",
+ " 318.0,\n",
+ " 324.0,\n",
+ " 330.0,\n",
+ " 336.0,\n",
+ " 342.0,\n",
+ " 348.0,\n",
+ " 354.0,\n",
+ " 360.0,\n",
+ " 366.0,\n",
+ " 372.0,\n",
+ " 378.0,\n",
+ " 384.0,\n",
+ " 390.0,\n",
+ " 396.0,\n",
+ " 402.0,\n",
+ " 408.0,\n",
+ " 414.0,\n",
+ " 420.0,\n",
+ " 426.0,\n",
+ " 432.0,\n",
+ " 438.0,\n",
+ " 444.0,\n",
+ " 450.0,\n",
+ " 456.0,\n",
+ " 462.0,\n",
+ " 468.0,\n",
+ " 474.0,\n",
+ " 480.0,\n",
+ " 486.0,\n",
+ " 492.0,\n",
+ " 498.0,\n",
+ " 504.0,\n",
+ " 510.0,\n",
+ " 516.0,\n",
+ " 522.0,\n",
+ " 528.0,\n",
+ " 534.0,\n",
+ " 540.0,\n",
+ " 546.0,\n",
+ " 552.0,\n",
+ " 558.0,\n",
+ " 564.0,\n",
+ " 570.0,\n",
+ " 576.0,\n",
+ " 582.0,\n",
+ " 588.0,\n",
+ " 594.0,\n",
+ " 600.0,\n",
+ " 606.0,\n",
+ " 612.0,\n",
+ " 618.0,\n",
+ " 624.0,\n",
+ " 630.0,\n",
+ " 636.0,\n",
+ " 642.0,\n",
+ " 648.0,\n",
+ " 654.0,\n",
+ " 660.0,\n",
+ " 666.0,\n",
+ " 672.0,\n",
+ " 678.0,\n",
+ " 684.0,\n",
+ " 690.0,\n",
+ " 696.0,\n",
+ " 702.0,\n",
+ " 708.0,\n",
+ " 714.0,\n",
+ " 720.0,\n",
+ " 726.0,\n",
+ " 732.0,\n",
+ " 738.0,\n",
+ " 744.0,\n",
+ " 750.0,\n",
+ " 756.0,\n",
+ " 762.0,\n",
+ " 768.0,\n",
+ " 774.0,\n",
+ " 780.0,\n",
+ " 786.0,\n",
+ " 792.0,\n",
+ " 798.0,\n",
+ " 804.0,\n",
+ " 810.0,\n",
+ " 816.0,\n",
+ " 822.0,\n",
+ " 828.0,\n",
+ " 834.0,\n",
+ " 840.0,\n",
+ " 846.0,\n",
+ " 852.0,\n",
+ " 858.0,\n",
+ " 864.0,\n",
+ " 870.0,\n",
+ " 876.0,\n",
+ " 882.0,\n",
+ " 888.0,\n",
+ " 894.0,\n",
+ " 900.0,\n",
+ " 906.0,\n",
+ " 912.0,\n",
+ " 918.0,\n",
+ " 924.0,\n",
+ " 930.0,\n",
+ " 936.0,\n",
+ " 942.0,\n",
+ " 948.0,\n",
+ " 954.0,\n",
+ " 960.0,\n",
+ " 966.0,\n",
+ " 972.0,\n",
+ " 978.0,\n",
+ " 984.0,\n",
+ " 990.0,\n",
+ " 996.0,\n",
+ " 1002.0,\n",
+ " 1008.0,\n",
+ " 1014.0,\n",
+ " 1020.0,\n",
+ " 1026.0,\n",
+ " 1032.0,\n",
+ " 1038.0,\n",
+ " 1044.0,\n",
+ " 1050.0,\n",
+ " 1056.0,\n",
+ " 1062.0,\n",
+ " 1068.0,\n",
+ " 1074.0,\n",
+ " 1080.0,\n",
+ " 1086.0,\n",
+ " 1092.0,\n",
+ " 1098.0,\n",
+ " 1104.0,\n",
+ " 1110.0,\n",
+ " 1116.0,\n",
+ " 1122.0,\n",
+ " 1128.0,\n",
+ " 1134.0,\n",
+ " 1140.0,\n",
+ " 1146.0,\n",
+ " 1152.0,\n",
+ " 1158.0,\n",
+ " 1164.0,\n",
+ " 1170.0,\n",
+ " 1176.0,\n",
+ " 1182.0,\n",
+ " 1188.0,\n",
+ " 1194.0,\n",
+ " 1200.0,\n",
+ " 1206.0,\n",
+ " 1212.0,\n",
+ " 1218.0,\n",
+ " 1224.0,\n",
+ " 1230.0,\n",
+ " 1236.0,\n",
+ " 1242.0,\n",
+ " 1248.0,\n",
+ " 1254.0,\n",
+ " 1260.0,\n",
+ " 1266.0,\n",
+ " 1272.0,\n",
+ " 1278.0,\n",
+ " 1284.0,\n",
+ " 1290.0,\n",
+ " 1296.0,\n",
+ " 1302.0,\n",
+ " 1308.0,\n",
+ " 1314.0,\n",
+ " 1320.0,\n",
+ " 1326.0,\n",
+ " 1332.0,\n",
+ " 1338.0,\n",
+ " 1344.0,\n",
+ " 1350.0,\n",
+ " 1356.0,\n",
+ " 1362.0,\n",
+ " 1368.0,\n",
+ " 1374.0,\n",
+ " 1380.0,\n",
+ " 1386.0,\n",
+ " 1392.0,\n",
+ " 1398.0,\n",
+ " 1404.0,\n",
+ " 1410.0,\n",
+ " 1416.0,\n",
+ " 1422.0,\n",
+ " 1428.0,\n",
+ " 1434.0,\n",
+ " 1440.0,\n",
+ " 1446.0],\n",
+ " 'end_offset_sec': [6.0,\n",
+ " 12.0,\n",
+ " 18.0,\n",
+ " 24.0,\n",
+ " 30.0,\n",
+ " 36.0,\n",
+ " 42.0,\n",
+ " 48.0,\n",
+ " 54.0,\n",
+ " 60.0,\n",
+ " 66.0,\n",
+ " 72.0,\n",
+ " 78.0,\n",
+ " 84.0,\n",
+ " 90.0,\n",
+ " 96.0,\n",
+ " 102.0,\n",
+ " 108.0,\n",
+ " 114.0,\n",
+ " 120.0,\n",
+ " 126.0,\n",
+ " 132.0,\n",
+ " 138.0,\n",
+ " 144.0,\n",
+ " 150.0,\n",
+ " 156.0,\n",
+ " 162.0,\n",
+ " 168.0,\n",
+ " 174.0,\n",
+ " 180.0,\n",
+ " 186.0,\n",
+ " 192.0,\n",
+ " 198.0,\n",
+ " 204.0,\n",
+ " 210.0,\n",
+ " 216.0,\n",
+ " 222.0,\n",
+ " 228.0,\n",
+ " 234.0,\n",
+ " 240.0,\n",
+ " 246.0,\n",
+ " 252.0,\n",
+ " 258.0,\n",
+ " 264.0,\n",
+ " 270.0,\n",
+ " 276.0,\n",
+ " 282.0,\n",
+ " 288.0,\n",
+ " 294.0,\n",
+ " 300.0,\n",
+ " 306.0,\n",
+ " 312.0,\n",
+ " 318.0,\n",
+ " 324.0,\n",
+ " 330.0,\n",
+ " 336.0,\n",
+ " 342.0,\n",
+ " 348.0,\n",
+ " 354.0,\n",
+ " 360.0,\n",
+ " 366.0,\n",
+ " 372.0,\n",
+ " 378.0,\n",
+ " 384.0,\n",
+ " 390.0,\n",
+ " 396.0,\n",
+ " 402.0,\n",
+ " 408.0,\n",
+ " 414.0,\n",
+ " 420.0,\n",
+ " 426.0,\n",
+ " 432.0,\n",
+ " 438.0,\n",
+ " 444.0,\n",
+ " 450.0,\n",
+ " 456.0,\n",
+ " 462.0,\n",
+ " 468.0,\n",
+ " 474.0,\n",
+ " 480.0,\n",
+ " 486.0,\n",
+ " 492.0,\n",
+ " 498.0,\n",
+ " 504.0,\n",
+ " 510.0,\n",
+ " 516.0,\n",
+ " 522.0,\n",
+ " 528.0,\n",
+ " 534.0,\n",
+ " 540.0,\n",
+ " 546.0,\n",
+ " 552.0,\n",
+ " 558.0,\n",
+ " 564.0,\n",
+ " 570.0,\n",
+ " 576.0,\n",
+ " 582.0,\n",
+ " 588.0,\n",
+ " 594.0,\n",
+ " 600.0,\n",
+ " 606.0,\n",
+ " 612.0,\n",
+ " 618.0,\n",
+ " 624.0,\n",
+ " 630.0,\n",
+ " 636.0,\n",
+ " 642.0,\n",
+ " 648.0,\n",
+ " 654.0,\n",
+ " 660.0,\n",
+ " 666.0,\n",
+ " 672.0,\n",
+ " 678.0,\n",
+ " 684.0,\n",
+ " 690.0,\n",
+ " 696.0,\n",
+ " 702.0,\n",
+ " 708.0,\n",
+ " 714.0,\n",
+ " 720.0,\n",
+ " 726.0,\n",
+ " 732.0,\n",
+ " 738.0,\n",
+ " 744.0,\n",
+ " 750.0,\n",
+ " 756.0,\n",
+ " 762.0,\n",
+ " 768.0,\n",
+ " 774.0,\n",
+ " 780.0,\n",
+ " 786.0,\n",
+ " 792.0,\n",
+ " 798.0,\n",
+ " 804.0,\n",
+ " 810.0,\n",
+ " 816.0,\n",
+ " 822.0,\n",
+ " 828.0,\n",
+ " 834.0,\n",
+ " 840.0,\n",
+ " 846.0,\n",
+ " 852.0,\n",
+ " 858.0,\n",
+ " 864.0,\n",
+ " 870.0,\n",
+ " 876.0,\n",
+ " 882.0,\n",
+ " 888.0,\n",
+ " 894.0,\n",
+ " 900.0,\n",
+ " 906.0,\n",
+ " 912.0,\n",
+ " 918.0,\n",
+ " 924.0,\n",
+ " 930.0,\n",
+ " 936.0,\n",
+ " 942.0,\n",
+ " 948.0,\n",
+ " 954.0,\n",
+ " 960.0,\n",
+ " 966.0,\n",
+ " 972.0,\n",
+ " 978.0,\n",
+ " 984.0,\n",
+ " 990.0,\n",
+ " 996.0,\n",
+ " 1002.0,\n",
+ " 1008.0,\n",
+ " 1014.0,\n",
+ " 1020.0,\n",
+ " 1026.0,\n",
+ " 1032.0,\n",
+ " 1038.0,\n",
+ " 1044.0,\n",
+ " 1050.0,\n",
+ " 1056.0,\n",
+ " 1062.0,\n",
+ " 1068.0,\n",
+ " 1074.0,\n",
+ " 1080.0,\n",
+ " 1086.0,\n",
+ " 1092.0,\n",
+ " 1098.0,\n",
+ " 1104.0,\n",
+ " 1110.0,\n",
+ " 1116.0,\n",
+ " 1122.0,\n",
+ " 1128.0,\n",
+ " 1134.0,\n",
+ " 1140.0,\n",
+ " 1146.0,\n",
+ " 1152.0,\n",
+ " 1158.0,\n",
+ " 1164.0,\n",
+ " 1170.0,\n",
+ " 1176.0,\n",
+ " 1182.0,\n",
+ " 1188.0,\n",
+ " 1194.0,\n",
+ " 1200.0,\n",
+ " 1206.0,\n",
+ " 1212.0,\n",
+ " 1218.0,\n",
+ " 1224.0,\n",
+ " 1230.0,\n",
+ " 1236.0,\n",
+ " 1242.0,\n",
+ " 1248.0,\n",
+ " 1254.0,\n",
+ " 1260.0,\n",
+ " 1266.0,\n",
+ " 1272.0,\n",
+ " 1278.0,\n",
+ " 1284.0,\n",
+ " 1290.0,\n",
+ " 1296.0,\n",
+ " 1302.0,\n",
+ " 1308.0,\n",
+ " 1314.0,\n",
+ " 1320.0,\n",
+ " 1326.0,\n",
+ " 1332.0,\n",
+ " 1338.0,\n",
+ " 1344.0,\n",
+ " 1350.0,\n",
+ " 1356.0,\n",
+ " 1362.0,\n",
+ " 1368.0,\n",
+ " 1374.0,\n",
+ " 1380.0,\n",
+ " 1386.0,\n",
+ " 1392.0,\n",
+ " 1398.0,\n",
+ " 1404.0,\n",
+ " 1410.0,\n",
+ " 1416.0,\n",
+ " 1422.0,\n",
+ " 1428.0,\n",
+ " 1434.0,\n",
+ " 1440.0,\n",
+ " 1446.0,\n",
+ " 1448.800048828125],\n",
+ " 'summaryfeatures': {'similarities': {'type': 'tensor(p{})',\n",
+ " 'cells': {'0': 0.26801764965057373,\n",
+ " '1': 0.25517868995666504,\n",
+ " '2': 0.18696394562721252,\n",
+ " '3': 0.21889425814151764,\n",
+ " '4': 0.1529473513364792,\n",
+ " '5': 0.14913758635520935,\n",
+ " '6': 0.23517441749572754,\n",
+ " '7': 0.2353234440088272,\n",
+ " '8': 0.21242228150367737,\n",
+ " '9': 0.1885206401348114,\n",
+ " '10': 0.2210436314344406,\n",
+ " '11': 0.22923655807971954,\n",
+ " '12': 0.23505590856075287,\n",
+ " '13': 0.1909424066543579,\n",
+ " '14': 0.19049349427223206,\n",
+ " '15': 0.2125398814678192,\n",
+ " '16': 0.1923658549785614,\n",
+ " '17': 0.18145661056041718,\n",
+ " '18': 0.24124839901924133,\n",
+ " '19': 0.1750694066286087,\n",
+ " '20': 0.1891847550868988,\n",
+ " '21': 0.18880757689476013,\n",
+ " '22': 0.16376550495624542,\n",
+ " '23': 0.23028762638568878,\n",
+ " '24': 0.26554375886917114,\n",
+ " '25': 0.19489403069019318,\n",
+ " '26': 0.19068247079849243,\n",
+ " '27': 0.17556644976139069,\n",
+ " '28': 0.1727730631828308,\n",
+ " '29': 0.18818673491477966,\n",
+ " '30': 0.18307140469551086,\n",
+ " '31': 0.18322986364364624,\n",
+ " '32': 0.18332254886627197,\n",
+ " '33': 0.17737486958503723,\n",
+ " '34': 0.2054746448993683,\n",
+ " '35': 0.1936066746711731,\n",
+ " '36': 0.2030111700296402,\n",
+ " '37': 0.19140127301216125,\n",
+ " '38': 0.19714966416358948,\n",
+ " '39': 0.19746947288513184,\n",
+ " '40': 0.1792682260274887,\n",
+ " '41': 0.23100340366363525,\n",
+ " '42': 0.2737155854701996,\n",
+ " '43': 0.26417821645736694,\n",
+ " '44': 0.24081182479858398,\n",
+ " '45': 0.2589956820011139,\n",
+ " '46': 0.23437820374965668,\n",
+ " '47': 0.22961024940013885,\n",
+ " '48': 0.18857116997241974,\n",
+ " '49': 0.19216707348823547,\n",
+ " '50': 0.15921726822853088,\n",
+ " '51': 0.14634984731674194,\n",
+ " '52': 0.18848517537117004,\n",
+ " '53': 0.17523136734962463,\n",
+ " '54': 0.16829127073287964,\n",
+ " '55': 0.16766668856143951,\n",
+ " '56': 0.22314974665641785,\n",
+ " '57': 0.15624946355819702,\n",
+ " '58': 0.13081294298171997,\n",
+ " '59': 0.1503870189189911,\n",
+ " '60': 0.17631740868091583,\n",
+ " '61': 0.17871999740600586,\n",
+ " '62': 0.15325675904750824,\n",
+ " '63': 0.1618715524673462,\n",
+ " '64': 0.17556707561016083,\n",
+ " '65': 0.21091774106025696,\n",
+ " '66': 0.17583170533180237,\n",
+ " '67': 0.18759286403656006,\n",
+ " '68': 0.20933479070663452,\n",
+ " '69': 0.20815375447273254,\n",
+ " '70': 0.1988670527935028,\n",
+ " '71': 0.18068914115428925,\n",
+ " '72': 0.16562244296073914,\n",
+ " '73': 0.167647585272789,\n",
+ " '74': 0.17215758562088013,\n",
+ " '75': 0.16383600234985352,\n",
+ " '76': 0.18459394574165344,\n",
+ " '77': 0.19995972514152527,\n",
+ " '78': 0.1888064742088318,\n",
+ " '79': 0.17635037004947662,\n",
+ " '80': 0.20570701360702515,\n",
+ " '81': 0.20117110013961792,\n",
+ " '82': 0.13397029042243958,\n",
+ " '83': 0.12500479817390442,\n",
+ " '84': 0.10284098237752914,\n",
+ " '85': 0.17455658316612244,\n",
+ " '86': 0.20681756734848022,\n",
+ " '87': 0.2104944884777069,\n",
+ " '88': 0.18222680687904358,\n",
+ " '89': 0.204770028591156,\n",
+ " '90': 0.17314346134662628,\n",
+ " '91': 0.21035337448120117,\n",
+ " '92': 0.1841922402381897,\n",
+ " '93': 0.1664196252822876,\n",
+ " '94': 0.19048520922660828,\n",
+ " '95': 0.16612933576107025,\n",
+ " '96': 0.19302061200141907,\n",
+ " '97': 0.17973093688488007,\n",
+ " '98': 0.25269824266433716,\n",
+ " '99': 0.22977915406227112,\n",
+ " '100': 0.15784724056720734,\n",
+ " '101': 0.1700689196586609,\n",
+ " '102': 0.19267229735851288,\n",
+ " '103': 0.1957896649837494,\n",
+ " '104': 0.1793247014284134,\n",
+ " '105': 0.1642160564661026,\n",
+ " '106': 0.17960935831069946,\n",
+ " '107': 0.17307010293006897,\n",
+ " '108': 0.16801056265830994,\n",
+ " '109': 0.17970556020736694,\n",
+ " '110': 0.1971571445465088,\n",
+ " '111': 0.1935385763645172,\n",
+ " '112': 0.22264227271080017,\n",
+ " '113': 0.21915990114212036,\n",
+ " '114': 0.20777805149555206,\n",
+ " '115': 0.22468087077140808,\n",
+ " '116': 0.21406306326389313,\n",
+ " '117': 0.1938605159521103,\n",
+ " '118': 0.22885730862617493,\n",
+ " '119': 0.16865134239196777,\n",
+ " '120': 0.1872825175523758,\n",
+ " '121': 0.20210419595241547,\n",
+ " '122': 0.2006729543209076,\n",
+ " '123': 0.17872017621994019,\n",
+ " '124': 0.18020905554294586,\n",
+ " '125': 0.18884027004241943,\n",
+ " '126': 0.19896523654460907,\n",
+ " '127': 0.21124014258384705,\n",
+ " '128': 0.18319082260131836,\n",
+ " '129': 0.19869163632392883,\n",
+ " '130': 0.21987730264663696,\n",
+ " '131': 0.24737203121185303,\n",
+ " '132': 0.2196074277162552,\n",
+ " '133': 0.1948014348745346,\n",
+ " '134': 0.20482322573661804,\n",
+ " '135': 0.20723658800125122,\n",
+ " '136': 0.20293781161308289,\n",
+ " '137': 0.23808544874191284,\n",
+ " '138': 0.21687531471252441,\n",
+ " '139': 0.18854688107967377,\n",
+ " '140': 0.23239216208457947,\n",
+ " '141': 0.20674535632133484,\n",
+ " '142': 0.2039334923028946,\n",
+ " '143': 0.2214721441268921,\n",
+ " '144': 0.17064169049263,\n",
+ " '145': 0.17559164762496948,\n",
+ " '146': 0.18758957087993622,\n",
+ " '147': 0.2586555480957031,\n",
+ " '148': 0.2239076793193817,\n",
+ " '149': 0.1858271211385727,\n",
+ " '150': 0.18375852704048157,\n",
+ " '151': 0.16894754767417908,\n",
+ " '152': 0.17636868357658386,\n",
+ " '153': 0.1585116684436798,\n",
+ " '154': 0.20884966850280762,\n",
+ " '155': 0.19982419908046722,\n",
+ " '156': 0.19640682637691498,\n",
+ " '157': 0.205767422914505,\n",
+ " '158': 0.18988652527332306,\n",
+ " '159': 0.16192299127578735,\n",
+ " '160': 0.23090824484825134,\n",
+ " '161': 0.19387517869472504,\n",
+ " '162': 0.1778217852115631,\n",
+ " '163': 0.19742624461650848,\n",
+ " '164': 0.1976637989282608,\n",
+ " '165': 0.17239904403686523,\n",
+ " '166': 0.17611661553382874,\n",
+ " '167': 0.18090175092220306,\n",
+ " '168': 0.18562501668930054,\n",
+ " '169': 0.1913011372089386,\n",
+ " '170': 0.17979900538921356,\n",
+ " '171': 0.16499203443527222,\n",
+ " '172': 0.16726429760456085,\n",
+ " '173': 0.17212313413619995,\n",
+ " '174': 0.1911279559135437,\n",
+ " '175': 0.19671502709388733,\n",
+ " '176': 0.18979108333587646,\n",
+ " '177': 0.2068617343902588,\n",
+ " '178': 0.2212362289428711,\n",
+ " '179': 0.20381945371627808,\n",
+ " '180': 0.18463441729545593,\n",
+ " '181': 0.20046135783195496,\n",
+ " '182': 0.1857784539461136,\n",
+ " '183': 0.17102620005607605,\n",
+ " '184': 0.19620634615421295,\n",
+ " '185': 0.24402475357055664,\n",
+ " '186': 0.27124571800231934,\n",
+ " '187': 0.271205872297287,\n",
+ " '188': 0.25007861852645874,\n",
+ " '189': 0.2618427276611328,\n",
+ " '190': 0.258881151676178,\n",
+ " '191': 0.2477029263973236,\n",
+ " '192': 0.27978116273880005,\n",
+ " '193': 0.22998890280723572,\n",
+ " '194': 0.1600525677204132,\n",
+ " '195': 0.20765827596187592,\n",
+ " '196': 0.26462453603744507,\n",
+ " '197': 0.2529928982257843,\n",
+ " '198': 0.2546486258506775,\n",
+ " '199': 0.3909624218940735,\n",
+ " '200': 0.3817324638366699,\n",
+ " '201': 0.3716684579849243,\n",
+ " '202': 0.38000035285949707,\n",
+ " '203': 0.40329158306121826,\n",
+ " '204': 0.211330845952034,\n",
+ " '205': 0.291744202375412,\n",
+ " '206': 0.36191847920417786,\n",
+ " '207': 0.391671359539032,\n",
+ " '208': 0.40864360332489014,\n",
+ " '209': 0.40476706624031067,\n",
+ " '210': 0.4112417697906494,\n",
+ " '211': 0.40934425592422485,\n",
+ " '212': 0.43537065386772156,\n",
+ " '213': 0.3644255995750427,\n",
+ " '214': 0.3053430914878845,\n",
+ " '215': 0.2679027020931244,\n",
+ " '216': 0.3584190607070923,\n",
+ " '217': 0.3512270152568817,\n",
+ " '218': 0.2889820337295532,\n",
+ " '219': 0.271373987197876,\n",
+ " '220': 0.3240889310836792,\n",
+ " '221': 0.2890607714653015,\n",
+ " '222': 0.2631019651889801,\n",
+ " '223': 0.286709725856781,\n",
+ " '224': 0.290524423122406,\n",
+ " '225': 0.3164805471897125,\n",
+ " '226': 0.32039016485214233,\n",
+ " '227': 0.3310452699661255,\n",
+ " '228': 0.3357622027397156,\n",
+ " '229': 0.40372908115386963,\n",
+ " '230': 0.41800743341445923,\n",
+ " '231': 0.40599969029426575,\n",
+ " '232': 0.3756481409072876,\n",
+ " '233': 0.2771507501602173,\n",
+ " '234': 0.2839912176132202,\n",
+ " '235': 0.2823885977268219,\n",
+ " '236': 0.27640271186828613,\n",
+ " '237': 0.2688301205635071,\n",
+ " '238': 0.27843406796455383,\n",
+ " '239': 0.28141558170318604,\n",
+ " '240': 0.2451176792383194,\n",
+ " '241': 0.16267892718315125}},\n",
+ " 'vespa.summaryFeatures.cached': 0.0}}}]}}"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "with app.syncio(connections=1) as session:\n",
+ " response: VespaQueryResponse = session.query(\n",
+ " yql=\"select * from videos where userQuery() OR ({targetHits:100}nearestNeighbor(embeddings,q))\",\n",
+ " query=user_query,\n",
+ " ranking=\"hybrid\",\n",
+ " hits=1,\n",
+ " body={\"input.query(q)\": q_embedding},\n",
+ " )\n",
+ " assert response.is_successful()\n",
+ "\n",
+ "for hit in response.hits:\n",
+ " print(json.dumps(hit, indent=4))\n",
+ "\n",
+ "response.get_json()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bnbWlc62nhyb",
+ "metadata": {
+ "id": "bnbWlc62nhyb"
+ },
+ "source": [
+ "In order to process the results above in a more consumable format and sort out the top N segments based on similarities, we can do this more conveniently in a pandas dataframe below:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "vGDIF53MvkdR",
+ "metadata": {
+ "id": "vGDIF53MvkdR"
+ },
+ "outputs": [],
+ "source": [
+ "def get_top_n_similarity_matches(data, N=5):\n",
+ " \"\"\"\n",
+ " Function to extract the top N similarity scores and their corresponding start and end offsets.\n",
+ "\n",
+ " Args:\n",
+ " - data (dict): Input JSON-like structure containing similarities and offsets.\n",
+ " - N (int): The number of top similarity scores to return.\n",
+ "\n",
+ " Returns:\n",
+ " - pd.DataFrame: A DataFrame with the top N similarity scores and their corresponding offsets.\n",
+ " \"\"\"\n",
+ " # Extract relevant fields\n",
+ " similarities = data[\"fields\"][\"summaryfeatures\"][\"similarities\"][\"cells\"]\n",
+ " start_offset_sec = data[\"fields\"][\"start_offset_sec\"]\n",
+ " end_offset_sec = data[\"fields\"][\"end_offset_sec\"]\n",
+ "\n",
+ " # Convert similarity scores to a list of tuples (index, similarity_score) and sort by similarity score\n",
+ " sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)\n",
+ "\n",
+ " # Extract top N similarity scores\n",
+ " top_n_similarities = sorted_similarities[:N]\n",
+ "\n",
+ " # Prepare results\n",
+ " results = []\n",
+ " for index_str, score in top_n_similarities:\n",
+ " index = int(index_str)\n",
+ " if index < len(start_offset_sec):\n",
+ " result = {\n",
+ " \"index\": index,\n",
+ " \"similarity_score\": score,\n",
+ " \"start_offset_sec\": start_offset_sec[index],\n",
+ " \"end_offset_sec\": end_offset_sec[index],\n",
+ " }\n",
+ " else:\n",
+ " result = {\n",
+ " \"index\": index,\n",
+ " \"similarity_score\": score,\n",
+ " \"start_offset_sec\": None,\n",
+ " \"end_offset_sec\": None,\n",
+ " }\n",
+ " results.append(result)\n",
+ "\n",
+ " # Convert results to a DataFrame\n",
+ " df = pd.DataFrame(results)\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ySanRKGLpAjB",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 363
+ },
+ "id": "ySanRKGLpAjB",
+ "outputId": "b3b37f71-7c8c-4946-b431-53b14c97f933"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " index | \n",
+ " similarity_score | \n",
+ " start_offset_sec | \n",
+ " end_offset_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 212 | \n",
+ " 0.435371 | \n",
+ " 1272.0 | \n",
+ " 1278.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 230 | \n",
+ " 0.418007 | \n",
+ " 1380.0 | \n",
+ " 1386.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 210 | \n",
+ " 0.411242 | \n",
+ " 1260.0 | \n",
+ " 1266.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 211 | \n",
+ " 0.409344 | \n",
+ " 1266.0 | \n",
+ " 1272.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 208 | \n",
+ " 0.408644 | \n",
+ " 1248.0 | \n",
+ " 1254.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 231 | \n",
+ " 0.406000 | \n",
+ " 1386.0 | \n",
+ " 1392.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 209 | \n",
+ " 0.404767 | \n",
+ " 1254.0 | \n",
+ " 1260.0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 229 | \n",
+ " 0.403729 | \n",
+ " 1374.0 | \n",
+ " 1380.0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 203 | \n",
+ " 0.403292 | \n",
+ " 1218.0 | \n",
+ " 1224.0 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 207 | \n",
+ " 0.391671 | \n",
+ " 1242.0 | \n",
+ " 1248.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " index similarity_score start_offset_sec end_offset_sec\n",
+ "0 212 0.435371 1272.0 1278.0\n",
+ "1 230 0.418007 1380.0 1386.0\n",
+ "2 210 0.411242 1260.0 1266.0\n",
+ "3 211 0.409344 1266.0 1272.0\n",
+ "4 208 0.408644 1248.0 1254.0\n",
+ "5 231 0.406000 1386.0 1392.0\n",
+ "6 209 0.404767 1254.0 1260.0\n",
+ "7 229 0.403729 1374.0 1380.0\n",
+ "8 203 0.403292 1218.0 1224.0\n",
+ "9 207 0.391671 1242.0 1248.0"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_result = get_top_n_similarity_matches(response.hits[0], N=10)\n",
+ "df_result"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "sYoc3zy3pNqV",
+ "metadata": {
+ "id": "sYoc3zy3pNqV"
+ },
+ "source": [
+ "## 5. Review results (Optional)\n",
+ "\n",
+ "We can review the results by spinning up a video player in the notebook and check the segments identified and judge by ourselves.\n",
+ "\n",
+ "But, first we need to obtain the contiguous segments, add 3 seconds overlap in the consolidated segments and convert to MM:SS so we can quickly find the segments to watch in the player. Let's write a function that takes the response as an input and provides the consolidated segments to view in the player."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "244lqkpvymGH",
+ "metadata": {
+ "id": "244lqkpvymGH"
+ },
+ "outputs": [],
+ "source": [
+ "def concatenate_contiguous_segments(df):\n",
+ " \"\"\"\n",
+ " Function to concatenate contiguous segments based on their start and end offsets.\n",
+ " Converts the concatenated segments to MM:SS format.\n",
+ "\n",
+ " Args:\n",
+ " - df (pd.DataFrame): DataFrame with columns 'start_offset_sec' and 'end_offset_sec'.\n",
+ "\n",
+ " Returns:\n",
+ " - List of tuples with concatenated segments in MM:SS format as (start_time, end_time).\n",
+ " \"\"\"\n",
+ " if df.empty:\n",
+ " return []\n",
+ "\n",
+ " # Sort by start_offset_sec for ordered processing\n",
+ " df = df.sort_values(by=\"start_offset_sec\").reset_index(drop=True)\n",
+ "\n",
+ " # Initialize the list to hold concatenated segments\n",
+ " concatenated_segments = []\n",
+ "\n",
+ " # Initialize the first segment\n",
+ " start = df.iloc[0][\"start_offset_sec\"]\n",
+ " end = df.iloc[0][\"end_offset_sec\"]\n",
+ "\n",
+ " for i in range(1, len(df)):\n",
+ " current_start = df.iloc[i][\"start_offset_sec\"]\n",
+ " current_end = df.iloc[i][\"end_offset_sec\"]\n",
+ "\n",
+ " # Check if the current segment is contiguous with the previous one\n",
+ " if current_start <= end:\n",
+ " # Extend the segment if it is contiguous\n",
+ " end = max(end, current_end)\n",
+ " else:\n",
+ " # Add the previous segment to the result list in MM:SS format\n",
+ " concatenated_segments.append(\n",
+ " (convert_seconds_to_mmss(start - 3), convert_seconds_to_mmss(end + 3))\n",
+ " )\n",
+ " # Start a new segment\n",
+ " start = current_start\n",
+ " end = current_end\n",
+ "\n",
+ " # Add the final segment\n",
+ " concatenated_segments.append(\n",
+ " (convert_seconds_to_mmss(start - 3), convert_seconds_to_mmss(end + 3))\n",
+ " )\n",
+ "\n",
+ " return concatenated_segments\n",
+ "\n",
+ "\n",
+ "def convert_seconds_to_mmss(seconds):\n",
+ " \"\"\"\n",
+ " Converts seconds to MM:SS format.\n",
+ "\n",
+ " Args:\n",
+ " - seconds (float): Time in seconds.\n",
+ "\n",
+ " Returns:\n",
+ " - str: Time in MM:SS format.\n",
+ " \"\"\"\n",
+ " minutes = int(seconds // 60)\n",
+ " seconds = int(seconds % 60)\n",
+ " return f\"{minutes:02}:{seconds:02}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "azg1FfNCzEpV",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "azg1FfNCzEpV",
+ "outputId": "ca4ce212-e6d6-4804-dab1-0ae7d1ab2f75"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('20:15', '20:27'), ('20:39', '21:21'), ('22:51', '23:15')]"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "segments = concatenate_contiguous_segments(df_result)\n",
+ "segments"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a86246d4",
+ "metadata": {
+ "id": "a86246d4",
+ "outputId": "54b050c3-4c62-4356-a3f2-78c02410bc19"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Middle point in seconds: 1221\n"
+ ]
+ }
+ ],
+ "source": [
+ "def time_to_seconds(time_str):\n",
+ " minutes, seconds = map(int, time_str.split(\":\"))\n",
+ " return minutes * 60 + seconds\n",
+ "\n",
+ "\n",
+ "# Convert start and end times of the first segment to seconds\n",
+ "start_sec = time_to_seconds(segments[0][0])\n",
+ "end_sec = time_to_seconds(segments[0][1])\n",
+ "\n",
+ "# Calculate the middle point\n",
+ "middle_point = (start_sec + end_sec) // 2 # Use integer division for an exact second\n",
+ "\n",
+ "print(\"Middle point in seconds:\", middle_point)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "XKK-tQMGp57L",
+ "metadata": {
+ "id": "XKK-tQMGp57L"
+ },
+ "source": [
+ "We can now spin-up the player and review the segments of interest.\n",
+ "Video player is set to start in the middle of the first segment."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "brwDc367FHzX",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 501
+ },
+ "id": "brwDc367FHzX",
+ "outputId": "a6b5a8d5-c696-4a38-f09c-9bcd31eb22a6"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from IPython.display import HTML\n",
+ "\n",
+ "video_url = \"https://ia601401.us.archive.org/1/items/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net/twas-the-night-before-christmas-1974-full-movie-freedownloadvideo.net.mp4\"\n",
+ "\n",
+ "video_player = f\"\"\"\n",
+ "\n",
+ "\n",
+ "\n",
+ "\"\"\"\n",
+ "\n",
+ "HTML(video_player)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cf10c4db",
+ "metadata": {
+ "id": "cf10c4db"
+ },
+ "source": [
+ "## 6. Clean-up\n",
+ "\n",
+ "The following will delete the application and data from the dev environment."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9d44767",
+ "metadata": {
+ "id": "a9d44767"
+ },
+ "outputs": [],
+ "source": [
+ "vespa_cloud.delete()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "B4zTc3eApSFy",
+ "metadata": {
+ "id": "B4zTc3eApSFy"
+ },
+ "source": [
+ "The following will delete the index created earlier where videos where uploaded:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7BAlC6R8pZD1",
+ "metadata": {
+ "id": "7BAlC6R8pZD1"
+ },
+ "outputs": [],
+ "source": [
+ "# Creating a client\n",
+ "client = TwelveLabs(api_key=TL_API_KEY)\n",
+ "\n",
+ "client.index.delete(index_id)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.14"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}