From b4edb4666d1ae657e355b033f83a4337c419d7f2 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 10:34:09 -0300 Subject: [PATCH 01/21] feat(module): add genai example --- .../multimodal_rag_langchain.ipynb | 1073 +++++++++++++++++ .../multimodal_rag_langchain_infra.tf | 94 ++ .../genai-rag-multimodal/terraform.tfvars | 5 + examples/genai-rag-multimodal/variables.tf | 60 + 4 files changed, 1232 insertions(+) create mode 100644 examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb create mode 100644 examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf create mode 100644 examples/genai-rag-multimodal/terraform.tfvars create mode 100644 examples/genai-rag-multimodal/variables.tf diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb new file mode 100644 index 00000000..21db4c43 --- /dev/null +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb @@ -0,0 +1,1073 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ijGzTHJJUCPY" + }, + "outputs": [], + "source": [ + "# Copyright 2024 Google LLC\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NDsTUvKjwHBW" + }, + "source": [ + "# Multimodal Retrieval Augmented Generation (RAG) with Gemini, Vertex AI Vector Search, and LangChain\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \"Google
Run in Colab Enterprise\n", + "
\n", + "
\n", + " \n", + " \"Google
Run in Colab\n", + "
\n", + "
\n", + " \n", + " \"Vertex
Open in Vertex AI Workbench\n", + "
\n", + "
\n", + " \n", + " \"GitHub
View on GitHub\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| | | \n", + "|-|-|\n", + "|Author(s) | [Holt Skinner](https://github.com/holtskinner) |" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VK1Q5ZYdVL4Y" + }, + "source": [ + "## Overview\n", + "\n", + "Retrieval augmented generation (RAG) has become a popular paradigm for enabling LLMs to access external data and also as a mechanism for grounding to mitigate against hallucinations.\n", + "\n", + "In this notebook, you will learn how to perform multimodal RAG where you will perform Q&A over a financial document filled with both text and images.\n", + "\n", + "### Gemini\n", + "\n", + "Gemini is a family of generative AI models developed by Google DeepMind that is designed for multimodal use cases. The Gemini API gives you access to the Gemini 1.0 Pro Vision and Gemini 1.0 Pro models.\n", + "\n", + "### Comparing text-based and multimodal RAG\n", + "\n", + "Multimodal RAG offers several advantages over text-based RAG:\n", + "\n", + "1. **Enhanced knowledge access:** Multimodal RAG can access and process both textual and visual information, providing a richer and more comprehensive knowledge base for the LLM.\n", + "2. **Improved reasoning capabilities:** By incorporating visual cues, multimodal RAG can make better informed inferences across different types of data modalities.\n", + "\n", + "This notebook shows you how to use RAG with Vertex AI Gemini API, and [multimodal embeddings](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/multimodal-embeddings), to build a document search engine.\n", + "\n", + "Through hands-on examples, you will discover how to construct a multimedia-rich metadata repository of your document sources, enabling search, comparison, and reasoning across diverse information streams." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RQT500QqVPIb" + }, + "source": [ + "### Objectives\n", + "\n", + "This notebook provides a guide to building a document search engine using multimodal retrieval augmented generation (RAG), step by step:\n", + "\n", + "1. Extract and store metadata of documents containing both text and images, and generate embeddings the documents\n", + "2. Search the metadata with text queries to find similar text or images\n", + "3. Search the metadata with image queries to find similar images\n", + "4. Using a text query as input, search for contextual answers using both text and images" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KnpYxfesh2rI" + }, + "source": [ + "### Costs\n", + "\n", + "This tutorial uses billable components of Google Cloud:\n", + "\n", + "- Vertex AI\n", + "\n", + "Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DXJpXzKrh2rJ" + }, + "source": [ + "## Getting Started\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "N5afkyDMSBW5" + }, + "source": [ + "### Install Vertex AI SDK for Python and other dependencies\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 863 + }, + "executionInfo": { + "elapsed": 23606, + "status": "ok", + "timestamp": 1707899661283, + "user": { + "displayName": "", + "userId": "" + }, + "user_tz": -330 + }, + "id": "kc4WxYmLSBW5", + "outputId": "41191d43-b2b3-4bfd-c5e5-c0a53f03d1e2", + "tags": [] + }, + "outputs": [], + "source": [ + "%pip install -U -q google-cloud-aiplatform langchain-core langchain-google-vertexai==1.0.4 langchain-text-splitters langchain-experimental \"unstructured[all-docs]\" pypdf pydantic lxml pillow matplotlib opencv-python tiktoken" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%conda install -c conda-forge poppler tesseract -y" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Private Endpoint IP Address\n", + "\n", + "Retrieve the IP address value created when setting up Private Service Connect for Vector Search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PRIVATE_ENDPOINT_IP_ADDRESS=" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Update Project Allowlist\n", + "\n", + "Insert the values of the Host Shared VPC Project and the current notebook project on the cell below.\n", + "\n", + "For example: `PROJECT_ALLOWLIST=[\"prj-d-shared-restricted-83dn\",\"prj-d-ml-machine-learning-0v09\"]` \n", + "\n", + "Remember that these values will be different on your environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROJECT_ALLOWLIST=[\"\", \"\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5Xep4W9lq-Z" + }, + "source": [ + "### Restart current runtime\n", + "\n", + "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 646, + "status": "ok", + "timestamp": 1707913872852, + "user": { + "displayName": "", + "userId": "" + }, + "user_tz": -330 + }, + "id": "XRvKdaPDTznN", + "outputId": "759357fb-7531-4423-a75f-b896af19ce37", + "tags": [] + }, + "outputs": [], + "source": [ + "# Restart kernel after installs so that your environment can access the new packages\n", + "import IPython\n", + "\n", + "app = IPython.Application.instance()\n", + "app.kernel.do_shutdown(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SbmM4z7FOBpM" + }, + "source": [ + "
\n", + "⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️\n", + "
\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O1vKZZoEh2rL" + }, + "source": [ + "### Define Google Cloud project information" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 673, + "status": "ok", + "timestamp": 1707913890887, + "user": { + "displayName": "", + "userId": "" + }, + "user_tz": -330 + }, + "id": "gJqZ76rJh2rM", + "outputId": "ed164f41-97f3-411d-8cba-d853d71957b9", + "tags": [] + }, + "outputs": [], + "source": [ + "PROJECT_ID = \"\" # @param {type:\"string\"}\n", + "LOCATION = \"us-central1\" # @param {type:\"string\"}\n", + "\n", + "# For Vector Search Staging\n", + "GCS_BUCKET = \"\" # @param {type:\"string\"}\n", + "GCS_BUCKET_URI = f\"gs://{GCS_BUCKET}\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize the Vertex AI SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D48gUW5-h2rM", + "tags": [] + }, + "outputs": [], + "source": [ + "from google.cloud import aiplatform\n", + "\n", + "aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=GCS_BUCKET_URI)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BuQwwRiniVFG" + }, + "source": [ + "### Import libraries\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "executionInfo": { + "elapsed": 647, + "status": "ok", + "timestamp": 1707913944466, + "user": { + "displayName": "", + "userId": "" + }, + "user_tz": -330 + }, + "id": "rtMowvm-yQ97", + "tags": [] + }, + "outputs": [], + "source": [ + "import base64\n", + "import os\n", + "import uuid\n", + "import re\n", + "\n", + "from typing import List, Tuple\n", + "\n", + "from IPython.display import display, Image, Markdown\n", + "\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", + "from langchain.storage import InMemoryStore\n", + "\n", + "from langchain_community.vectorstores import Chroma\n", + "\n", + "from langchain_core.documents import Document\n", + "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n", + "from langchain_core.messages import AIMessage, HumanMessage\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "\n", + "from langchain_text_splitters import CharacterTextSplitter\n", + "\n", + "from langchain_google_vertexai import (\n", + " VertexAI,\n", + " ChatVertexAI,\n", + " VertexAIEmbeddings,\n", + " VectorSearchVectorStore,\n", + ")\n", + "\n", + "from unstructured.partition.pdf import partition_pdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Loading" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g7bKCQMFT7JT" + }, + "source": [ + "#### Get documents and images from GCS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 1757, + "status": "ok", + "timestamp": 1707913903524, + "user": { + "displayName": "", + "userId": "" + }, + "user_tz": -330 + }, + "id": "KwbL89zcY39N", + "outputId": "baa3a478-4d55-4b9c-e02f-6816a1d589b1", + "tags": [] + }, + "outputs": [], + "source": [ + "# Download documents and images used in this notebook\n", + "!gsutil -m rsync -r gs://github-repo/rag/intro_multimodal_rag/ .\n", + "print(\"Download completed\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ps1G-cCfpibN" + }, + "source": [ + "## Partition PDF tables, text, and images" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jqLsy3iZ5t-R" + }, + "source": [ + "### The data\n", + "\n", + "The source data that you will use in this notebook is a modified version of [Google-10K](https://abc.xyz/assets/investor/static/pdf/20220202_alphabet_10K.pdf) which provides a comprehensive overview of the company's financial performance, business operations, management, and risk factors. As the original document is rather large, you will be using [a modified version with only 14 pages](https://storage.googleapis.com/github-repo/rag/multimodal_rag_langchain/google-10k-sample-14pages.pdf) instead. Although it's truncated, the sample document still contains text along with images such as tables, charts, and graphs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pdf_folder_path = \"data/\"\n", + "pdf_file_name = \"intro_multimodal_rag_old_version/data/google-10k-sample-part1.pdf\"\n", + "\n", + "# Extract images, tables, and chunk text from a PDF file.\n", + "raw_pdf_elements = partition_pdf(\n", + " filename=pdf_file_name,\n", + " extract_images_in_pdf=False,\n", + " infer_table_structure=True,\n", + " chunking_strategy=\"by_title\",\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " image_output_dir_path=pdf_folder_path,\n", + ")\n", + "\n", + "# Categorize extracted elements from a PDF into tables and texts.\n", + "tables = []\n", + "texts = []\n", + "for element in raw_pdf_elements:\n", + " if \"unstructured.documents.elements.Table\" in str(type(element)):\n", + " tables.append(str(element))\n", + " elif \"unstructured.documents.elements.CompositeElement\" in str(type(element)):\n", + " texts.append(str(element))\n", + "\n", + "# Optional: Enforce a specific token size for texts\n", + "text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n", + " chunk_size=10000, chunk_overlap=0\n", + ")\n", + "joined_texts = \" \".join(texts)\n", + "texts_4k_token = text_splitter.split_text(joined_texts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_NAME = \"gemini-1.0-pro-vision\"\n", + "\n", + "\n", + "# Generate summaries of text elements\n", + "def generate_text_summaries(\n", + " texts: List[str], tables: List[str], summarize_texts: bool = False\n", + ") -> Tuple[List, List]:\n", + " \"\"\"\n", + " Summarize text elements\n", + " texts: List of str\n", + " tables: List of str\n", + " summarize_texts: Bool to summarize texts\n", + " \"\"\"\n", + "\n", + " # Prompt\n", + " prompt_text = \"\"\"You are an assistant tasked with summarizing tables and text for retrieval. \\\n", + " These summaries will be embedded and used to retrieve the raw text or table elements. \\\n", + " Give a concise summary of the table or text that is well optimized for retrieval. Table or text: {element} \"\"\"\n", + " prompt = PromptTemplate.from_template(prompt_text)\n", + " empty_response = RunnableLambda(\n", + " lambda x: AIMessage(content=\"Error processing document\")\n", + " )\n", + " # Text summary chain\n", + " model = VertexAI(\n", + " temperature=0, model_name=MODEL_NAME, max_output_tokens=1024\n", + " ).with_fallbacks([empty_response])\n", + " summarize_chain = {\"element\": lambda x: x} | prompt | model | StrOutputParser()\n", + "\n", + " # Initialize empty summaries\n", + " text_summaries = []\n", + " table_summaries = []\n", + "\n", + " # Apply to text if texts are provided and summarization is requested\n", + " if texts:\n", + " if summarize_texts:\n", + " text_summaries = summarize_chain.batch(texts, {\"max_concurrency\": 1})\n", + " else:\n", + " text_summaries = texts\n", + "\n", + " # Apply to tables if tables are provided\n", + " if tables:\n", + " table_summaries = summarize_chain.batch(tables, {\"max_concurrency\": 1})\n", + "\n", + " return text_summaries, table_summaries\n", + "\n", + "\n", + "# Get text, table summaries\n", + "text_summaries, table_summaries = generate_text_summaries(\n", + " texts_4k_token, tables, summarize_texts=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def encode_image(image_path):\n", + " \"\"\"Getting the base64 string\"\"\"\n", + " with open(image_path, \"rb\") as image_file:\n", + " return base64.b64encode(image_file.read()).decode(\"utf-8\")\n", + "\n", + "\n", + "def image_summarize(img_base64, prompt):\n", + " \"\"\"Make image summary\"\"\"\n", + " model = ChatVertexAI(model_name=\"gemini-pro-vision\", max_output_tokens=1024)\n", + "\n", + " msg = model(\n", + " [\n", + " HumanMessage(\n", + " content=[\n", + " {\"type\": \"text\", \"text\": prompt},\n", + " {\n", + " \"type\": \"image_url\",\n", + " \"image_url\": {\"url\": f\"data:image/png;base64,{img_base64}\"},\n", + " },\n", + " ]\n", + " )\n", + " ]\n", + " )\n", + " return msg.content\n", + "\n", + "\n", + "def generate_img_summaries(path):\n", + " \"\"\"\n", + " Generate summaries and base64 encoded strings for images\n", + " path: Path to list of .jpg files extracted by Unstructured\n", + " \"\"\"\n", + "\n", + " # Store base64 encoded images\n", + " img_base64_list = []\n", + "\n", + " # Store image summaries\n", + " image_summaries = []\n", + "\n", + " # Prompt\n", + " prompt = \"\"\"You are an assistant tasked with summarizing images for retrieval. \\\n", + " These summaries will be embedded and used to retrieve the raw image. \\\n", + " Give a concise summary of the image that is well optimized for retrieval.\n", + " If it's a table, extract all elements of the table.\n", + " If it's a graph, explain the findings in the graph.\n", + " Do not include any numbers that are not mentioned in the image.\n", + " \"\"\"\n", + "\n", + " # Apply to images\n", + " for img_file in sorted(os.listdir(path)):\n", + " if img_file.endswith(\".png\"):\n", + " img_path = os.path.join(path, img_file)\n", + " base64_image = encode_image(img_path)\n", + " img_base64_list.append(base64_image)\n", + " image_summaries.append(image_summarize(base64_image, prompt))\n", + "\n", + " return img_base64_list, image_summaries\n", + "\n", + "\n", + "# Image summaries\n", + "img_base64_list, image_summaries = generate_img_summaries(\".\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create & Deploy Vertex AI Vector Search Index & Endpoint\n", + "\n", + "Skip this step if you already have Vector Search set up.\n", + "\n", + "- https://console.cloud.google.com/vertex-ai/matching-engine/indexes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Create [`MatchingEngineIndex`](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndex)\n", + " - https://cloud.google.com/vertex-ai/docs/vector-search/create-manage-index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings\n", + "DIMENSIONS = 768 # Dimensions output from textembedding-gecko\n", + "\n", + "index = aiplatform.MatchingEngineIndex.create_tree_ah_index(\n", + " display_name=\"mm_rag_langchain_index\",\n", + " dimensions=DIMENSIONS,\n", + " approximate_neighbors_count=150,\n", + " leaf_node_embedding_count=500,\n", + " leaf_nodes_to_search_percent=7,\n", + " description=\"Multimodal RAG LangChain Index\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Create [`MatchingEngineIndexEndpoint`](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndexEndpoint)\n", + " - https://cloud.google.com/vertex-ai/docs/vector-search/deploy-index-public" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DEPLOYED_INDEX_ID = \"mm_rag_langchain_index_endpoint\"\n", + "\n", + "index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(\n", + " display_name=DEPLOYED_INDEX_ID,\n", + " description=\"Multimodal RAG LangChain Index Endpoint\",\n", + " enable_private_service_connect=True,\n", + " project_allowlist=PROJECT_ALLOWLIST,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Deploy Index to Index Endpoint\n", + " - NOTE: This will take a while to run.\n", + " - You can stop this cell after starting it instead of waiting for deployment.\n", + " - You can check the status at https://console.cloud.google.com/vertex-ai/matching-engine/indexes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index_endpoint = index_endpoint.deploy_index(\n", + " index=index, deployed_index_id=\"mm_rag_langchain_deployed_index\"\n", + ")\n", + "index_endpoint.deployed_indexes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* You will need to create the service attachment to the Private Service Connect IP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "index_endpoint.deployed_indexes.private_endpoints.service_attachment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Retrieve the value of the service attachment and execute the command below on your local machine:\n", + "\n", + "```bash\n", + "NETWORK=\n", + "NETWORK_PROJECT_ID=\n", + "SERVICE_ATTACHMENT=\n", + "\n", + "gcloud compute forwarding-rules create vector-search-endpoint \\\n", + " --network=$NETWORK \\\n", + " --address=vector-search-endpoint \\\n", + " --target-service-attachment=$SERVICE_ATTACHMENT \\\n", + " --project=$NETWORK_PROJECT_ID \\\n", + " --region=us-central1\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create retriever & load documents" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Create [`VectorSearchVectorStore`](https://api.python.langchain.com/en/latest/vectorstores/langchain_google_vertexai.vectorstores.vectorstores.VectorSearchVectorStore.html) with Vector Search Index ID and Endpoint ID.\n", + "- Use [`textembedding-gecko`](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings) as embedding model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The vectorstore to use to index the summaries\n", + "vectorstore = VectorSearchVectorStore.from_components(\n", + " project_id=PROJECT_ID,\n", + " region=LOCATION,\n", + " gcs_bucket_name=GCS_BUCKET,\n", + " index_id=index.name,\n", + " endpoint_id=index_endpoint.name,\n", + " embedding=VertexAIEmbeddings(model_name=\"textembedding-gecko@003\"),\n", + " private_service_connect_ip_address=PRIVATE_ENDPOINT_IP_ADDRESS,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Create Multi-Vector Retriever using the vector store you created.\n", + "- Since vector stores only contain the embedding and an ID, you'll also need to create a document store indexed by ID to get the original source documents after searching for embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docstore = InMemoryStore()\n", + "\n", + "id_key = \"doc_id\"\n", + "# Create the multi-vector retriever\n", + "retriever_multi_vector_img = MultiVectorRetriever(\n", + " vectorstore=vectorstore,\n", + " docstore=docstore,\n", + " id_key=id_key,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Set Private Service Connect address" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retriever_multi_vector_img.vectorstore._searcher._endpoint.private_service_connect_ip_address = PRIVATE_ENDPOINT_IP_ADDRESS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Load data into Document Store and Vector Store" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Raw Document Contents\n", + "doc_contents = texts + tables + img_base64_list\n", + "\n", + "doc_ids = [str(uuid.uuid4()) for _ in doc_contents]\n", + "summary_docs = [\n", + " Document(page_content=s, metadata={id_key: doc_ids[i]})\n", + " for i, s in enumerate(text_summaries + table_summaries + image_summaries)\n", + "]\n", + "\n", + "retriever_multi_vector_img.docstore.mset(list(zip(doc_ids, doc_contents)))\n", + "\n", + "# If using Vertex AI Vector Search, this will take a while to complete.\n", + "# You can cancel this cell and continue later.\n", + "retriever_multi_vector_img.vectorstore.add_documents(summary_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Chain with Retriever and Gemini LLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def looks_like_base64(sb):\n", + " \"\"\"Check if the string looks like base64\"\"\"\n", + " return re.match(\"^[A-Za-z0-9+/]+[=]{0,2}$\", sb) is not None\n", + "\n", + "\n", + "def is_image_data(b64data):\n", + " \"\"\"\n", + " Check if the base64 data is an image by looking at the start of the data\n", + " \"\"\"\n", + " image_signatures = {\n", + " b\"\\xFF\\xD8\\xFF\": \"jpg\",\n", + " b\"\\x89\\x50\\x4E\\x47\\x0D\\x0A\\x1A\\x0A\": \"png\",\n", + " b\"\\x47\\x49\\x46\\x38\": \"gif\",\n", + " b\"\\x52\\x49\\x46\\x46\": \"webp\",\n", + " }\n", + " try:\n", + " header = base64.b64decode(b64data)[:8] # Decode and get the first 8 bytes\n", + " for sig, format in image_signatures.items():\n", + " if header.startswith(sig):\n", + " return True\n", + " return False\n", + " except Exception:\n", + " return False\n", + "\n", + "\n", + "def split_image_text_types(docs):\n", + " \"\"\"\n", + " Split base64-encoded images and texts\n", + " \"\"\"\n", + " b64_images = []\n", + " texts = []\n", + " for doc in docs:\n", + " # Check if the document is of type Document and extract page_content if so\n", + " if isinstance(doc, Document):\n", + " doc = doc.page_content\n", + " if looks_like_base64(doc) and is_image_data(doc):\n", + " b64_images.append(doc)\n", + " else:\n", + " texts.append(doc)\n", + " return {\"images\": b64_images, \"texts\": texts}\n", + "\n", + "\n", + "def img_prompt_func(data_dict):\n", + " \"\"\"\n", + " Join the context into a single string\n", + " \"\"\"\n", + " formatted_texts = \"\\n\".join(data_dict[\"context\"][\"texts\"])\n", + " messages = [\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": (\n", + " \"You are financial analyst tasking with providing investment advice.\\n\"\n", + " \"You will be given a mix of text, tables, and image(s) usually of charts or graphs.\\n\"\n", + " \"Use this information to provide investment advice related to the user's question. \\n\"\n", + " f\"User-provided question: {data_dict['question']}\\n\\n\"\n", + " \"Text and / or tables:\\n\"\n", + " f\"{formatted_texts}\"\n", + " ),\n", + " }\n", + " ]\n", + "\n", + " # Adding image(s) to the messages if present\n", + " if data_dict[\"context\"][\"images\"]:\n", + " for image in data_dict[\"context\"][\"images\"]:\n", + " messages.append(\n", + " {\n", + " \"type\": \"image_url\",\n", + " \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image}\"},\n", + " }\n", + " )\n", + " return [HumanMessage(content=messages)]\n", + "\n", + "\n", + "# Create RAG chain\n", + "chain_multimodal_rag = (\n", + " {\n", + " \"context\": retriever_multi_vector_img | RunnableLambda(split_image_text_types),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", + " | RunnableLambda(img_prompt_func)\n", + " | ChatVertexAI(\n", + " temperature=0, model_name=\"gemini-pro-vision\", max_output_tokens=1024\n", + " ) # Multi-modal LLM\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Process user query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"\"\"\n", + " - What are the critical difference between various graphs for Class A Share?\n", + " - Which index best matches Class A share performance closely where Google is not already a part? Explain the reasoning.\n", + " - Identify key chart patterns for Google Class A shares.\n", + " - What is cost of revenues, operating expenses and net income for 2020. Do mention the percentage change\n", + " - What was the effect of Covid in the 2020 financial year?\n", + " - What are the total revenues for APAC and USA for 2021?\n", + " - What is deferred income taxes?\n", + " - How do you compute net income per share?\n", + " - What drove percentage change in the consolidated revenue and cost of revenue for the year 2021 and was there any effect of Covid?\n", + " - What is the cause of 41% increase in revenue from 2020 to 2021 and how much is dollar change?\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get Retrieved documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# List of source documents\n", + "docs = retriever_multi_vector_img.get_relevant_documents(query, limit=10)\n", + "\n", + "source_docs = split_image_text_types(docs)\n", + "\n", + "print(source_docs[\"texts\"])\n", + "\n", + "for i in source_docs[\"images\"]:\n", + " display(Image(base64.b64decode(i)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get generative response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result = chain_multimodal_rag.invoke(query)\n", + "\n", + "Markdown(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KwNrHCqbi3xi" + }, + "source": [ + "## Conclusions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "05jynhZnkgxn" + }, + "source": [ + "Congratulations on making it through this multimodal RAG notebook!\n", + "\n", + "While multimodal RAG can be quite powerful, note that it can face some limitations:\n", + "\n", + "* **Data dependency:** Needs high-accuracy data from the text and visuals.\n", + "* **Computationally demanding:** Generating embeddings from multimodal data is resource-intensive.\n", + "* **Domain specific:** Models trained on general data may not shine in specialized fields like medicine.\n", + "* **Black box:** Understanding how these models work can be tricky, hindering trust and adoption.\n", + "\n", + "\n", + "Despite these challenges, multimodal RAG represents a significant step towards search and retrieval systems that can handle diverse, multimodal data." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "environment": { + "kernel": "python3", + "name": "common-cpu.m116", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/base-cpu:m116" + }, + "kernelspec": { + "display_name": "Python 3 (Local)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf new file mode 100644 index 00000000..20a083f9 --- /dev/null +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf @@ -0,0 +1,94 @@ + + +resource "google_workbench_instance" "instance" { + disable_proxy_access = false + instance_owners = [] + location = var.instance_location + name = var.machine_name + project = var.machine_learning_project + + gce_setup { + service_accounts { + email = google_service_account.notebook_runner.email + } + disable_public_ip = true + machine_type = var.machine_type + metadata = { + "disable-mixer" = "false" + "notebook-disable-downloads" = "true" + "notebook-disable-root" = "true" + "notebook-disable-terminal" = "true" + "notebook-upgrade-schedule" = "00 19 * * MON" + "report-dns-resolution" = "true" + "report-event-health" = "true" + "terraform" = "true" + } + tags = [ + "egress-internet", + ] + boot_disk { + disk_encryption = "CMEK" + disk_size_gb = "150" + disk_type = "PD_SSD" + kms_key = var.kms_key + } + data_disks { + disk_encryption = "CMEK" + disk_size_gb = "150" + disk_type = "PD_SSD" + kms_key = var.kms_key + } + network_interfaces { + network = var.network + subnet = var.subnet + } + vm_image { + family = "workbench-instances" + project = "cloud-notebooks-managed" + } + } +} + +resource "random_string" "suffix" { + length = 4 + special = false + upper = false +} + +resource "google_storage_bucket" "vector_search_bucket" { + name = "vector-search-${random_string.suffix.result}" + location = "US" + project = var.machine_learning_project + uniform_bucket_level_access = true +} + +resource "google_compute_address" "vector_search_static_ip" { + name = var.vector_search_address_name + region = var.vector_search_ip_region + subnetwork = var.subnet + project = var.vector_search_vpc_project + address_type = "INTERNAL" +} + +resource "google_service_account" "notebook_runner" { + account_id = var.service_account_name + display_name = "RAG Notebook Runner Service Account" + project = var.machine_learning_project +} + +# IAM Roles + +resource "google_project_iam_member" "notebook_runner_roles" { + for_each = toset([ + "roles/aiplatform.user" + ]) + project = var.machine_learning_project + role = each.key + member = google_service_account.notebook_runner.member +} + +resource "google_storage_bucket_iam_member" "notebook_runner_bucket_admin" { + bucket = google_storage_bucket.vector_search_bucket.name + role = "roles/storage.admin" + member = google_service_account.notebook_runner.member +} diff --git a/examples/genai-rag-multimodal/terraform.tfvars b/examples/genai-rag-multimodal/terraform.tfvars new file mode 100644 index 00000000..34944ca3 --- /dev/null +++ b/examples/genai-rag-multimodal/terraform.tfvars @@ -0,0 +1,5 @@ +kms_key = +network = +subnet = +machine_learning_project = +vector_search_vpc_project = diff --git a/examples/genai-rag-multimodal/variables.tf b/examples/genai-rag-multimodal/variables.tf new file mode 100644 index 00000000..f77180e4 --- /dev/null +++ b/examples/genai-rag-multimodal/variables.tf @@ -0,0 +1,60 @@ +variable "service_account_name" { + description = "The name of the service account" + type = string + default = "rag-notebook-runner" +} + +variable "machine_learning_project" { + description = "Machine Learning Project ID" + type = string +} + +variable "vector_search_address_name" { + description = "The name of the address to create" + type = string + default = "vector-search-endpoint" +} + +variable "vector_search_ip_region" { + description = "The region to create the address in" + type = string + default = "us-central1" +} + +variable "vector_search_vpc_project" { + description = "The project ID where the Host VPC network is located" + type = string +} + +variable "kms_key" { + description = "The KMS key to use for disk encryption" + type = string +} + +variable "network" { + description = "The Host VPC network ID to connect the instance to" + type = string +} + +variable "subnet" { + description = "The subnet ID within the Host VPC network to use in Vertex Workbench and Private Service Connect" + type = string +} + +variable "machine_type" { + description = "The type of machine to use for the instance" + type = string + default = "e2-standard-2" +} + +variable "machine_name" { + description = "The name of the machine instance" + type = string + default = "rag-notebook-instance" +} + +variable "instance_location" { + description = "Vertex Workbench Instance Location" + type = string + default = "us-central1-a" +} From 4167f0d36e9eada3551755cf26d0c750439c9c86 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 10:34:54 -0300 Subject: [PATCH 02/21] feat: add README.md --- examples/genai-rag-multimodal/README.md | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 examples/genai-rag-multimodal/README.md diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md new file mode 100644 index 00000000..fd843d31 --- /dev/null +++ b/examples/genai-rag-multimodal/README.md @@ -0,0 +1,41 @@ +# Multimodal RAG Langhain Example + +## Overview + +Retrieval augmented generation (RAG) has become a popular paradigm for enabling LLMs to access external data and also as a mechanism for grounding to mitigate against hallucinations. + +In this notebook, you will perform multimodal RAG by performing Q&A over a financial document filled with both text and images. + +This example is an adapted version of the sample Generative AI notebook from the Google Cloud codebase. You can find the original example and other notebooks in the following repository: [Google Cloud Platform Generative AI](https://github.com/GoogleCloudPlatform/generative-ai/tree/main). + +The main modifications to the original example include: + +- Adaptations to comply with Cloud Foundation Toolkit security measures. +- Installation of additional libraries in the Conda environment. +- Use of Vertex AI Workbench to run the notebook with a custom Service Account. +- Implementation of Vector Search on Vertex AI with [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). + +## Requirements + +- Terraform +- Authenticated Google Cloud SDK + +### Provision Infrastructure with Terraform + +- Update the `terraform.tfvars` file with values from your environment. + - The code below is an example using the Development environment host VPC network, the env-level kms key for the machine learning project and the machine learning project. + + ```terraform + kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" + network = "projects/prj-d-shared-restricted-83dn/global/networks/vpc-d-shared-restricted" + subnet = "projects/prj-d-shared-restricted-83dn/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1" + machine_learning_project = "prj-d-ml-machine-learning-0v09" + vector_search_vpc_project = "prj-d-shared-restricted-83dn" + ``` + +## Usage + +Once all the requirements are set up, you can begin by running and adjusting the notebook step-by-step. + +To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open jupyterlab and upload the notebook to it. + From d0d3a7c77634176be0b549a83065e96f3ceca65d Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 10:35:40 -0300 Subject: [PATCH 03/21] edit README.md --- examples/genai-rag-multimodal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index fd843d31..cd8952c0 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -37,5 +37,5 @@ The main modifications to the original example include: Once all the requirements are set up, you can begin by running and adjusting the notebook step-by-step. -To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open jupyterlab and upload the notebook to it. +To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open jupyterlab and upload the notebook (`multimodal_rag_langchain.ipynb`) to it. From e87a79079956b4700bcedd922cdd5f32ff41ffa0 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 10:41:59 -0300 Subject: [PATCH 04/21] add known issues --- examples/genai-rag-multimodal/README.md | 4 ++++ examples/genai-rag-multimodal/terraform.tfvars | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index cd8952c0..dadd67c7 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -39,3 +39,7 @@ Once all the requirements are set up, you can begin by running and adjusting the To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open jupyterlab and upload the notebook (`multimodal_rag_langchain.ipynb`) to it. +## Known Issues + +- Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found. + - When creating the VertexAI Workbench Instance through terraform you might face this issue. The issue is being tracked on this link: https://github.com/hashicorp/terraform-provider-google/issues/17904 diff --git a/examples/genai-rag-multimodal/terraform.tfvars b/examples/genai-rag-multimodal/terraform.tfvars index 34944ca3..7b5ce4a1 100644 --- a/examples/genai-rag-multimodal/terraform.tfvars +++ b/examples/genai-rag-multimodal/terraform.tfvars @@ -1,5 +1,5 @@ -kms_key = -network = -subnet = -machine_learning_project = -vector_search_vpc_project = +kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" +network = "projects/prj-d-shared-restricted-83dn/global/networks/vpc-d-shared-restricted" +subnet = "projects/prj-d-shared-restricted-83dn/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1" +machine_learning_project = "prj-d-ml-machine-learning-0v09" +vector_search_vpc_project = "prj-d-shared-restricted-83dn" From b61df64178d20d54723a10c2a2db8721e8f3b1c6 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 10:43:47 -0300 Subject: [PATCH 05/21] add fix --- examples/genai-rag-multimodal/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index dadd67c7..ed7767aa 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -43,3 +43,4 @@ To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open - Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found. - When creating the VertexAI Workbench Instance through terraform you might face this issue. The issue is being tracked on this link: https://github.com/hashicorp/terraform-provider-google/issues/17904 + - If you face this issue you will not be able to use terraform to create the instance, therefore, you will need to manually create it on Google Cloud Console using the same parameters. From a76ee2577c902fbb0d84205b6579e69655beee26 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 13:55:28 -0300 Subject: [PATCH 06/21] add service agent role assignment --- .../multimodal_rag_langchain_infra.tf | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf index 20a083f9..438a439d 100644 --- a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf @@ -1,4 +1,22 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +data "google_project" "ml_project" { + project_id = var.machine_learning_project +} resource "google_workbench_instance" "instance" { disable_proxy_access = false @@ -76,8 +94,6 @@ resource "google_service_account" "notebook_runner" { project = var.machine_learning_project } -# IAM Roles - resource "google_project_iam_member" "notebook_runner_roles" { for_each = toset([ "roles/aiplatform.user" @@ -92,3 +108,15 @@ resource "google_storage_bucket_iam_member" "notebook_runner_bucket_admin" { role = "roles/storage.admin" member = google_service_account.notebook_runner.member } + +# Service Agent Role Assignment - Allows creation of workbench instance when using var.kms_key + +resource "google_kms_crypto_key_iam_member" "service_agent_kms_key_binding" { + for_each = toset([ + "serviceAccount:service-${data.google_project.ml_project.number}@compute-system.iam.gserviceaccount.com", + "serviceAccount:service-${data.google_project.ml_project.number}@gcp-sa-notebooks.iam.gserviceaccount.com" + ]) + crypto_key_id = var.kms_key + role = "roles/cloudkms.cryptoKeyEncrypterDecrypter" + member = each.value +} From ba4461ae750d7a6614508a547fcfa33ab1303efb Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 13:56:14 -0300 Subject: [PATCH 07/21] update terraform.tfvars --- examples/genai-rag-multimodal/terraform.tfvars | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/genai-rag-multimodal/terraform.tfvars b/examples/genai-rag-multimodal/terraform.tfvars index 7b5ce4a1..34944ca3 100644 --- a/examples/genai-rag-multimodal/terraform.tfvars +++ b/examples/genai-rag-multimodal/terraform.tfvars @@ -1,5 +1,5 @@ -kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" -network = "projects/prj-d-shared-restricted-83dn/global/networks/vpc-d-shared-restricted" -subnet = "projects/prj-d-shared-restricted-83dn/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1" -machine_learning_project = "prj-d-ml-machine-learning-0v09" -vector_search_vpc_project = "prj-d-shared-restricted-83dn" +kms_key = +network = +subnet = +machine_learning_project = +vector_search_vpc_project = From 484dd84dccc8804dc4c129d4f267e3b7800f77e8 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 4 Jun 2024 17:17:17 -0300 Subject: [PATCH 08/21] Update README with VPC-SC Instruction --- examples/genai-rag-multimodal/README.md | 28 +++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index ed7767aa..6a6b6877 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -23,7 +23,7 @@ The main modifications to the original example include: ### Provision Infrastructure with Terraform - Update the `terraform.tfvars` file with values from your environment. - - The code below is an example using the Development environment host VPC network, the env-level kms key for the machine learning project and the machine learning project. + - The code below is an example using the Development environment host VPC network, the env-level kms key for the machine learning project and the machine learning project. ```terraform kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" @@ -33,6 +33,26 @@ The main modifications to the original example include: vector_search_vpc_project = "prj-d-shared-restricted-83dn" ``` +### Allow file download from Google Notebook Examples Bucket on VPC-SC Perimeter + +When running the Notebook, you will reach a step that downloads an example PDF file from a bucket, you need to add the egress rule below on the VPC-SC perimeter to allow the operation. + + ```yaml + - egressFrom: + identities: + - serviceAccount:rag-notebook-runner@.iam.gserviceaccount.com + egressTo: + operations: + - methodSelectors: + - method: google.storage.buckets.list + - method: google.storage.buckets.get + - method: google.storage.objects.get + - method: google.storage.objects.list + serviceName: storage.googleapis.com + resources: + - projects/200612033880 # Google Cloud Example Project + ``` + ## Usage Once all the requirements are set up, you can begin by running and adjusting the notebook step-by-step. @@ -41,6 +61,6 @@ To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open ## Known Issues -- Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found. - - When creating the VertexAI Workbench Instance through terraform you might face this issue. The issue is being tracked on this link: https://github.com/hashicorp/terraform-provider-google/issues/17904 - - If you face this issue you will not be able to use terraform to create the instance, therefore, you will need to manually create it on Google Cloud Console using the same parameters. +- `Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found`. + - When creating the VertexAI Workbench Instance through terraform you might face this issue. The issue is being tracked on this [link](https://github.com/hashicorp/terraform-provider-google/issues/17904). + - If you face this issue you will not be able to use terraform to create the instance, therefore, you will need to manually create it on Google Cloud Console using the same parameters. From 29d363470e47c7ec8ab38e9f7b782cafa04e4f9b Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Wed, 5 Jun 2024 14:39:39 -0300 Subject: [PATCH 09/21] fix render --- examples/genai-rag-multimodal/README.md | 44 ++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index 6a6b6877..40aa949b 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -25,33 +25,33 @@ The main modifications to the original example include: - Update the `terraform.tfvars` file with values from your environment. - The code below is an example using the Development environment host VPC network, the env-level kms key for the machine learning project and the machine learning project. - ```terraform - kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" - network = "projects/prj-d-shared-restricted-83dn/global/networks/vpc-d-shared-restricted" - subnet = "projects/prj-d-shared-restricted-83dn/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1" - machine_learning_project = "prj-d-ml-machine-learning-0v09" - vector_search_vpc_project = "prj-d-shared-restricted-83dn" - ``` + ```terraform + kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" + network = "projects/prj-d-shared-restricted-83dn/global/networks/vpc-d-shared-restricted" + subnet = "projects/prj-d-shared-restricted-83dn/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1" + machine_learning_project = "prj-d-ml-machine-learning-0v09" + vector_search_vpc_project = "prj-d-shared-restricted-83dn" + ``` ### Allow file download from Google Notebook Examples Bucket on VPC-SC Perimeter When running the Notebook, you will reach a step that downloads an example PDF file from a bucket, you need to add the egress rule below on the VPC-SC perimeter to allow the operation. - ```yaml - - egressFrom: - identities: - - serviceAccount:rag-notebook-runner@.iam.gserviceaccount.com - egressTo: - operations: - - methodSelectors: - - method: google.storage.buckets.list - - method: google.storage.buckets.get - - method: google.storage.objects.get - - method: google.storage.objects.list - serviceName: storage.googleapis.com - resources: - - projects/200612033880 # Google Cloud Example Project - ``` +```yaml +- egressFrom: + identities: + - serviceAccount:rag-notebook-runner@.iam.gserviceaccount.com +egressTo: + operations: + - methodSelectors: + - method: google.storage.buckets.list + - method: google.storage.buckets.get + - method: google.storage.objects.get + - method: google.storage.objects.list + serviceName: storage.googleapis.com + resources: + - projects/200612033880 # Google Cloud Example Project +``` ## Usage From e0a50f2477b86d150f726b655456d842944648bf Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 6 Jun 2024 10:03:43 -0300 Subject: [PATCH 10/21] Update module --- examples/genai-rag-multimodal/README.md | 12 +++ .../multimodal_rag_langchain.ipynb | 95 ++++++++----------- examples/genai-rag-multimodal/outputs.tf | 24 +++++ 3 files changed, 78 insertions(+), 53 deletions(-) create mode 100644 examples/genai-rag-multimodal/outputs.tf diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index 40aa949b..04e3d74e 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -59,6 +59,18 @@ Once all the requirements are set up, you can begin by running and adjusting the To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open jupyterlab and upload the notebook (`multimodal_rag_langchain.ipynb`) to it. +### Optional: Use `terraform output` and bash command to fill in fields in the notebook + +You can save some time adjusting the notebook by running the commands below: + +```bash +sed -i "s//$(terraform output -raw private_endpoint_ip_address)/g" multimodal_rag_langchain.ipynb +sed -i "s//$(terraform output -raw host_vpc_project_id)/g" multimodal_rag_langchain.ipynb +sed -i "s//$(terraform output -raw notebook_project_id)/g" multimodal_rag_langchain.ipynb +sed -i "s//$(terraform output -raw vector_search_bucket_name)/g" multimodal_rag_langchain.ipynb +sed -i "s::$(terraform output -raw host_vpc_network):g" multimodal_rag_langchain.ipynb +``` + ## Known Issues - `Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found`. diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb index 21db4c43..242a14e9 100644 --- a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb @@ -179,11 +179,13 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "R5Xep4W9lq-Z" + }, "source": [ - "### Create Private Endpoint IP Address\n", + "### Restart current runtime\n", "\n", - "Retrieve the IP address value created when setting up Private Service Connect for Vector Search." + "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel." ] }, { @@ -192,20 +194,20 @@ "metadata": {}, "outputs": [], "source": [ - "PRIVATE_ENDPOINT_IP_ADDRESS=" + "# Restart kernel after installs so that your environment can access the new packages\n", + "import IPython\n", + "\n", + "app = IPython.Application.instance()\n", + "app.kernel.do_shutdown(True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Update Project Allowlist\n", - "\n", - "Insert the values of the Host Shared VPC Project and the current notebook project on the cell below.\n", - "\n", - "For example: `PROJECT_ALLOWLIST=[\"prj-d-shared-restricted-83dn\",\"prj-d-ml-machine-learning-0v09\"]` \n", + "### Create Private Endpoint IP Address\n", "\n", - "Remember that these values will be different on your environment" + "Retrieve the IP address value created when setting up Private Service Connect for Vector Search. You can retrieve this value by running `terraform output` on the example." ] }, { @@ -214,48 +216,29 @@ "metadata": {}, "outputs": [], "source": [ - "PROJECT_ALLOWLIST=[\"\", \"\"]" + "PRIVATE_ENDPOINT_IP_ADDRESS=\"\"" ] }, { "cell_type": "markdown", - "metadata": { - "id": "R5Xep4W9lq-Z" - }, + "metadata": {}, "source": [ - "### Restart current runtime\n", + "### Update Project Allowlist\n", "\n", - "To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel." + "Insert the values of the Host Shared VPC Project and the current notebook project on the cell below.\n", + "\n", + "For example: `PROJECT_ALLOWLIST=[\"prj-d-shared-restricted-83dn\",\"prj-d-ml-machine-learning-0v09\"]` \n", + "\n", + "Remember that these values will be different on your environment" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "executionInfo": { - "elapsed": 646, - "status": "ok", - "timestamp": 1707913872852, - "user": { - "displayName": "", - "userId": "" - }, - "user_tz": -330 - }, - "id": "XRvKdaPDTznN", - "outputId": "759357fb-7531-4423-a75f-b896af19ce37", - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ - "# Restart kernel after installs so that your environment can access the new packages\n", - "import IPython\n", - "\n", - "app = IPython.Application.instance()\n", - "app.kernel.do_shutdown(True)" + "PROJECT_ALLOWLIST=[\"\", \"\"]" ] }, { @@ -302,7 +285,7 @@ }, "outputs": [], "source": [ - "PROJECT_ID = \"\" # @param {type:\"string\"}\n", + "PROJECT_ID = \"\" # @param {type:\"string\"}\n", "LOCATION = \"us-central1\" # @param {type:\"string\"}\n", "\n", "# For Vector Search Staging\n", @@ -716,27 +699,33 @@ "metadata": {}, "outputs": [], "source": [ - "index_endpoint.deployed_indexes.private_endpoints.service_attachment" + "SERVICE_ATTACHMENT=index_endpoint.deployed_indexes.private_endpoints.service_attachment\n", + "\n", + "SERVICE_ATTACHMENT" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Retrieve the value of the service attachment and execute the command below on your local machine:\n", - "\n", - "```bash\n", - "NETWORK=\n", - "NETWORK_PROJECT_ID=\n", - "SERVICE_ATTACHMENT=\n", + "* Retrieve the value of the service attachment and execute the command below on your local machine:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "NETWORK=\"\"\n", + "NETWORK_PROJECT_ID=\"\"\n", "\n", - "gcloud compute forwarding-rules create vector-search-endpoint \\\n", - " --network=$NETWORK \\\n", + "!gcloud compute forwarding-rules create vector-search-endpoint \\\n", + " --network={NETWORK} \\\n", " --address=vector-search-endpoint \\\n", - " --target-service-attachment=$SERVICE_ATTACHMENT \\\n", - " --project=$NETWORK_PROJECT_ID \\\n", - " --region=us-central1\n", - "```" + " --target-service-attachment={SERVICE_ATTACHMENT} \\\n", + " --project={NETWORK_PROJECT_ID} \\\n", + " --region=us-central1" ] }, { diff --git a/examples/genai-rag-multimodal/outputs.tf b/examples/genai-rag-multimodal/outputs.tf new file mode 100644 index 00000000..5a5a9b1a --- /dev/null +++ b/examples/genai-rag-multimodal/outputs.tf @@ -0,0 +1,24 @@ +output "private_endpoint_ip_address" { + description = "The private IP address of the vector search endpoint" + value = google_compute_address.vector_search_static_ip.address +} + +output "host_vpc_project_id" { + description = "This is the Project ID where the Host VPC network is located" + value = var.vector_search_vpc_project +} + +output "host_vpc_network" { + description = "This is the Self-link of the Host VPC network" + value = var.network +} + +output "notebook_project_id" { + description = "The Project ID where the notebook will be run on" + value = var.machine_learning_project +} + +output "vector_search_bucket_name" { + description = "The name of the bucket that Vector Search will use" + value = google_storage_bucket.vector_search_bucket.name +} From d1a02dc5e0836f5d70e394417b63efcfd24808ec Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 6 Jun 2024 11:35:07 -0300 Subject: [PATCH 11/21] add regional bucket and fix deployed index access --- .../genai-rag-multimodal/multimodal_rag_langchain.ipynb | 2 +- .../genai-rag-multimodal/multimodal_rag_langchain_infra.tf | 3 ++- examples/genai-rag-multimodal/variables.tf | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb index 242a14e9..7e1cea33 100644 --- a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb @@ -699,7 +699,7 @@ "metadata": {}, "outputs": [], "source": [ - "SERVICE_ATTACHMENT=index_endpoint.deployed_indexes.private_endpoints.service_attachment\n", + "SERVICE_ATTACHMENT=index_endpoint.deployed_indexes[0].private_endpoints.service_attachment\n", "\n", "SERVICE_ATTACHMENT" ] diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf index 438a439d..dd13746f 100644 --- a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf @@ -75,7 +75,8 @@ resource "random_string" "suffix" { resource "google_storage_bucket" "vector_search_bucket" { name = "vector-search-${random_string.suffix.result}" - location = "US" + location = var.vector_search_bucket_location + storage_class = "REGIONAL" project = var.machine_learning_project uniform_bucket_level_access = true } diff --git a/examples/genai-rag-multimodal/variables.tf b/examples/genai-rag-multimodal/variables.tf index f77180e4..ac3fe55c 100644 --- a/examples/genai-rag-multimodal/variables.tf +++ b/examples/genai-rag-multimodal/variables.tf @@ -4,6 +4,12 @@ variable "service_account_name" { default = "rag-notebook-runner" } +variable "vector_search_bucket_location" { + description = "Bucket Region" + type = string + default = "US-CENTRAL1" +} + variable "machine_learning_project" { description = "Machine Learning Project ID" type = string From a28de6879f192e413e70bf5f7ee0e00847fc30a5 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 6 Jun 2024 11:57:26 -0300 Subject: [PATCH 12/21] add Google Header to file --- examples/genai-rag-multimodal/outputs.tf | 16 ++++++++++++++++ examples/genai-rag-multimodal/variables.tf | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/examples/genai-rag-multimodal/outputs.tf b/examples/genai-rag-multimodal/outputs.tf index 5a5a9b1a..2ed792a4 100644 --- a/examples/genai-rag-multimodal/outputs.tf +++ b/examples/genai-rag-multimodal/outputs.tf @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + output "private_endpoint_ip_address" { description = "The private IP address of the vector search endpoint" value = google_compute_address.vector_search_static_ip.address diff --git a/examples/genai-rag-multimodal/variables.tf b/examples/genai-rag-multimodal/variables.tf index ac3fe55c..8c3ff9da 100644 --- a/examples/genai-rag-multimodal/variables.tf +++ b/examples/genai-rag-multimodal/variables.tf @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + variable "service_account_name" { description = "The name of the service account" type = string From fcbd3cc6dbfe93d061a00aa5619be2f2c147646d Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 6 Jun 2024 12:40:54 -0300 Subject: [PATCH 13/21] update README.md with terraform docs --- examples/genai-rag-multimodal/README.md | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index 04e3d74e..396fad50 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -76,3 +76,33 @@ sed -i "s::$(terraform output -raw host_vpc_network):g" - `Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found`. - When creating the VertexAI Workbench Instance through terraform you might face this issue. The issue is being tracked on this [link](https://github.com/hashicorp/terraform-provider-google/issues/17904). - If you face this issue you will not be able to use terraform to create the instance, therefore, you will need to manually create it on Google Cloud Console using the same parameters. + + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| instance\_location | Vertex Workbench Instance Location | `string` | `"us-central1-a"` | no | +| kms\_key | The KMS key to use for disk encryption | `string` | n/a | yes | +| machine\_learning\_project | Machine Learning Project ID | `string` | n/a | yes | +| machine\_name | The name of the machine instance | `string` | `"rag-notebook-instance"` | no | +| machine\_type | The type of machine to use for the instance | `string` | `"e2-standard-2"` | no | +| network | The Host VPC network ID to connect the instance to | `string` | n/a | yes | +| service\_account\_name | The name of the service account | `string` | `"rag-notebook-runner"` | no | +| subnet | The subnet ID within the Host VPC network to use in Vertex Workbench and Private Service Connect | `string` | n/a | yes | +| vector\_search\_address\_name | The name of the address to create | `string` | `"vector-search-endpoint"` | no | +| vector\_search\_bucket\_location | Bucket Region | `string` | `"US-CENTRAL1"` | no | +| vector\_search\_ip\_region | The region to create the address in | `string` | `"us-central1"` | no | +| vector\_search\_vpc\_project | The project ID where the Host VPC network is located | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| host\_vpc\_network | This is the Self-link of the Host VPC network | +| host\_vpc\_project\_id | This is the Project ID where the Host VPC network is located | +| notebook\_project\_id | The Project ID where the notebook will be run on | +| private\_endpoint\_ip\_address | The private IP address of the vector search endpoint | +| vector\_search\_bucket\_name | The name of the bucket that Vector Search will use | + + \ No newline at end of file From 8d9e167897f65ab30e5a873f0c1f60467df3b714 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 6 Jun 2024 14:42:32 -0300 Subject: [PATCH 14/21] lint fixes --- examples/genai-rag-multimodal/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index 396fad50..2291fd1a 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -67,7 +67,7 @@ You can save some time adjusting the notebook by running the commands below: sed -i "s//$(terraform output -raw private_endpoint_ip_address)/g" multimodal_rag_langchain.ipynb sed -i "s//$(terraform output -raw host_vpc_project_id)/g" multimodal_rag_langchain.ipynb sed -i "s//$(terraform output -raw notebook_project_id)/g" multimodal_rag_langchain.ipynb -sed -i "s//$(terraform output -raw vector_search_bucket_name)/g" multimodal_rag_langchain.ipynb +sed -i "s//$(terraform output -raw vector_search_bucket_name)/g" multimodal_rag_langchain.ipynb sed -i "s::$(terraform output -raw host_vpc_network):g" multimodal_rag_langchain.ipynb ``` @@ -105,4 +105,5 @@ sed -i "s::$(terraform output -raw host_vpc_network):g" | private\_endpoint\_ip\_address | The private IP address of the vector search endpoint | | vector\_search\_bucket\_name | The name of the bucket that Vector Search will use | - \ No newline at end of file + + From 31f8e499bee9dc0601c45cd83452963ee4849994 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Fri, 7 Jun 2024 09:28:49 -0300 Subject: [PATCH 15/21] implement PR review changes --- examples/genai-rag-multimodal/README.md | 98 ++++++++++++------- .../multimodal_rag_langchain_infra.tf | 2 +- 2 files changed, 65 insertions(+), 35 deletions(-) diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index 2291fd1a..5f0c8421 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -1,12 +1,12 @@ -# Multimodal RAG Langhain Example +# Multimodal RAG Langchain Example ## Overview -Retrieval augmented generation (RAG) has become a popular paradigm for enabling LLMs to access external data and also as a mechanism for grounding to mitigate against hallucinations. +Retrieval Augmented Generation (RAG) has become a popular paradigm for enabling LLMs to access external data and also as a mechanism for [Grounding](https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/overview), to mitigate against hallucinations. In this notebook, you will perform multimodal RAG by performing Q&A over a financial document filled with both text and images. -This example is an adapted version of the sample Generative AI notebook from the Google Cloud codebase. You can find the original example and other notebooks in the following repository: [Google Cloud Platform Generative AI](https://github.com/GoogleCloudPlatform/generative-ai/tree/main). +This example is an adapted version of the sample Generative AI notebook from the Google Cloud codebase. You can find the original example and other notebooks in the [Google Cloud Platform Generative AI](https://github.com/GoogleCloudPlatform/generative-ai/tree/main) repository. The main modifications to the original example include: @@ -17,21 +17,27 @@ The main modifications to the original example include: ## Requirements -- Terraform -- Authenticated Google Cloud SDK +- Terraform v1.7.5 +- [Authenticated Google Cloud SDK 469.0.0](https://cloud.google.com/sdk/docs/authorizing) ### Provision Infrastructure with Terraform - Update the `terraform.tfvars` file with values from your environment. - - The code below is an example using the Development environment host VPC network, the env-level kms key for the machine learning project and the machine learning project. - ```terraform - kms_key = "projects/prj-d-kms-cau3/locations/us-central1/keyRings/ml-env-keyring/cryptoKeys/prj-d-ml-machine-learning" - network = "projects/prj-d-shared-restricted-83dn/global/networks/vpc-d-shared-restricted" - subnet = "projects/prj-d-shared-restricted-83dn/regions/us-central1/subnetworks/sb-d-shared-restricted-us-central1" - machine_learning_project = "prj-d-ml-machine-learning-0v09" - vector_search_vpc_project = "prj-d-shared-restricted-83dn" - ``` + ```terraform + kms_key = "projects/KMS-PROJECT-ID/locations/REGION/keyRings/ML-ENV-KEYRING/cryptoKeys/ML-ENV-KEY" + network = "projects/NETWORK-PROJECT-ID/global/networks/NETWORK-NAME" + subnet = "projects/NETWORK-PROJECT-ID/regions/REGION/subnetworks/SUBNET-NAME" + machine_learning_project = "MACHINE-LEARNING-PROJECT-ID" + vector_search_vpc_project = "NETWORK-PROJECT-ID" + ``` + +- Assuming you are deploying the example on top of the development environment, the following instructions will provide you more insight on how to retrieve these values: + - **NETWORK-PROJECT-ID**: Run `terraform output -raw restricted_host_project_id` on `gcp-networks` repository, inside the development environment directory and branch. + - **NETWORK-NAME**: Run `terraform output -raw restricted_network_name` on `gcp-networks` repository, inside the development environment directory and branch. + - **MACHINE-LEARNING-PROJECT-ID**: Run `terraform output -raw machine_learning_project_id` on `gcp-projects` repository, inside the Machine Learning business unit directory and on the development branch. + - **KMS-PROJECT-ID**, **ML-ENV-KEYRING**, **ML-ENV-KEY**: Run `terraform output machine_learning_kms_keys` on `gcp-projects` repository, inside the Machine Learning business unit directory and on the development branch. + - **REGION**: The chosen region. ### Allow file download from Google Notebook Examples Bucket on VPC-SC Perimeter @@ -41,41 +47,66 @@ When running the Notebook, you will reach a step that downloads an example PDF f - egressFrom: identities: - serviceAccount:rag-notebook-runner@.iam.gserviceaccount.com -egressTo: - operations: - - methodSelectors: - - method: google.storage.buckets.list - - method: google.storage.buckets.get - - method: google.storage.objects.get - - method: google.storage.objects.list - serviceName: storage.googleapis.com - resources: - - projects/200612033880 # Google Cloud Example Project + egressTo: + operations: + - methodSelectors: + - method: google.storage.buckets.list + - method: google.storage.buckets.get + - method: google.storage.objects.get + - method: google.storage.objects.list + serviceName: storage.googleapis.com + resources: + - projects/200612033880 # Google Cloud Example Project ``` ## Usage -Once all the requirements are set up, you can begin by running and adjusting the notebook step-by-step. +Once all the requirements are set up, you can start by running and adjusting the notebook step-by-step. -To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open jupyterlab and upload the notebook (`multimodal_rag_langchain.ipynb`) to it. +To run the notebook, open the Google Cloud Console on Vertex AI Workbench, open JupyterLab and upload the notebook (`multimodal_rag_langchain.ipynb`) to it. ### Optional: Use `terraform output` and bash command to fill in fields in the notebook You can save some time adjusting the notebook by running the commands below: -```bash -sed -i "s//$(terraform output -raw private_endpoint_ip_address)/g" multimodal_rag_langchain.ipynb -sed -i "s//$(terraform output -raw host_vpc_project_id)/g" multimodal_rag_langchain.ipynb -sed -i "s//$(terraform output -raw notebook_project_id)/g" multimodal_rag_langchain.ipynb -sed -i "s//$(terraform output -raw vector_search_bucket_name)/g" multimodal_rag_langchain.ipynb -sed -i "s::$(terraform output -raw host_vpc_network):g" multimodal_rag_langchain.ipynb -``` +- Extract values from `terraform output` and validate. + + ```bash + export private_endpoint_ip_address=$(terraform output -raw private_endpoint_ip_address) + echo private_endpoint_ip_address=$private_endpoint_ip_address + + export host_vpc_project_id=$(terraform output -raw host_vpc_project_id) + echo host_vpc_project_id=$host_vpc_project_id + + export notebook_project_id=$(terraform output -raw notebook_project_id) + echo notebook_project_id=$notebook_project_id + + export vector_search_bucket_name=$(terraform output -raw vector_search_bucket_name) + echo vector_search_bucket_name=$vector_search_bucket_name + + export host_vpc_network=$(terraform output -raw host_vpc_network) + echo host_vpc_network=$host_vpc_network + ``` + +- Search and Replace using `sed` command. + + ```bash + sed -i "s//$private_endpoint_ip_address/g" multimodal_rag_langchain.ipynb + + sed -i "s//$host_vpc_project_id/g" multimodal_rag_langchain.ipynb + + sed -i "s//$notebook_project_id/g" multimodal_rag_langchain.ipynb + + sed -i "s//$vector_search_bucket_name/g" multimodal_rag_langchain.ipynb + + sed -i "s::$host_vpc_network:g" multimodal_rag_langchain.ipynb + ``` ## Known Issues - `Error: Error creating Instance: googleapi: Error 400: value_to_check(https://compute.googleapis.com/compute/v1/projects/...) is not found`. - When creating the VertexAI Workbench Instance through terraform you might face this issue. The issue is being tracked on this [link](https://github.com/hashicorp/terraform-provider-google/issues/17904). - - If you face this issue you will not be able to use terraform to create the instance, therefore, you will need to manually create it on Google Cloud Console using the same parameters. + - If you face this issue you will not be able to use terraform to create the instance, therefore, you will need to manually create it on [Google Cloud Console](https://console.cloud.google.com/vertex-ai/workbench/instances) using the same parameters. ## Inputs @@ -106,4 +137,3 @@ sed -i "s::$(terraform output -raw host_vpc_network):g" | vector\_search\_bucket\_name | The name of the bucket that Vector Search will use | - diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf index dd13746f..6b5e7871 100644 --- a/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain_infra.tf @@ -68,7 +68,7 @@ resource "google_workbench_instance" "instance" { } resource "random_string" "suffix" { - length = 4 + length = 10 special = false upper = false } From 4d97ed94e1f9a20a7e04d413d7aa2cdbef473c46 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Fri, 7 Jun 2024 09:37:52 -0300 Subject: [PATCH 16/21] update host_vpc projectid --- examples/genai-rag-multimodal/outputs.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/genai-rag-multimodal/outputs.tf b/examples/genai-rag-multimodal/outputs.tf index 2ed792a4..7ad847ca 100644 --- a/examples/genai-rag-multimodal/outputs.tf +++ b/examples/genai-rag-multimodal/outputs.tf @@ -21,7 +21,7 @@ output "private_endpoint_ip_address" { output "host_vpc_project_id" { description = "This is the Project ID where the Host VPC network is located" - value = var.vector_search_vpc_project + value = google_compute_address.vector_search_static_ip.project } output "host_vpc_network" { From bf4210876e1384d435d1766d8064b62081ced3e2 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Wed, 12 Jun 2024 09:29:20 -0300 Subject: [PATCH 17/21] add RAGAS evaluation --- .../multimodal_rag_langchain.ipynb | 173 +++++++++++++++++- 1 file changed, 171 insertions(+), 2 deletions(-) diff --git a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb index 7e1cea33..4c138eed 100644 --- a/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb +++ b/examples/genai-rag-multimodal/multimodal_rag_langchain.ipynb @@ -599,7 +599,7 @@ "\n", "\n", "# Image summaries\n", - "img_base64_list, image_summaries = generate_img_summaries(\".\")" + "img_base64_list, image_summaries = generate_img_summaries(\"./intro_multimodal_rag_old_version\")" ] }, { @@ -824,8 +824,17 @@ " for i, s in enumerate(text_summaries + table_summaries + image_summaries)\n", "]\n", "\n", - "retriever_multi_vector_img.docstore.mset(list(zip(doc_ids, doc_contents)))\n", + "list_of_docs = list(zip(doc_ids, doc_contents))\n", "\n", + "retriever_multi_vector_img.docstore.mset(list_of_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# If using Vertex AI Vector Search, this will take a while to complete.\n", "# You can cancel this cell and continue later.\n", "retriever_multi_vector_img.vectorstore.add_documents(summary_docs)" @@ -1000,6 +1009,166 @@ "Markdown(result)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### RAGAS Evaluation\n", + "\n", + "On the cells below we will be using RAGAS to evaluate the RAG pipeline for text-based context." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install ragas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "questions = [\n", + " \"How did COVID-19 initially impact Google's advertising revenue in 2020?\",\n", + " \"How did Google's advertising revenue recover from the initial COVID-19 impact?\",\n", + " \"What was the primary driver of Google's operating cash flow in 2020?\",\n", + " \"How did Google's share repurchases compare to the previous year in 2020?\"\n", + "]\n", + "\n", + "golden_answers = [\n", + " \"COVID-19 initially impacted Google's advertising revenue in 2020 in two ways, Users searched for less commercially-driven topics, reducing the relevance and value of ads displayed and Businesses cut back on advertising budgets due to the economic downturn caused by the pandemic.\",\n", + " \"Google's advertising revenue recovered from the initial COVID-19 impact through a combination of factors, User search activity shifted back to more commercially-driven topics, increasing the effectiveness of advertising and As the economic climate improved, businesses began to invest more heavily in advertising again.\",\n", + " \"The primary driver of Google's operating cash flow in 2020 was revenue generated from its advertising products, totaling $91.7 billion\",\n", + " \"Google's share repurchases in 2020 were $50.3 billion, reflecting a significant increase of 62% compared to the prior year.\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def summarize_image_context(doc_base64):\n", + " prompt = \"\"\"You are an assistant tasked with summarizing images for retrieval. \\\n", + " These summaries will be embedded and used to retrieve the raw image. \\\n", + " Give a concise summary of the image that is well optimized for retrieval.\n", + " If it's a table, extract all elements of the table.\n", + " If it's a graph, explain the findings in the graph.\n", + " Do not include any numbers that are not mentioned in the image.\n", + " \"\"\"\n", + " return image_summarize(doc_base64, prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_samples = {\n", + " \"contexts\": [],\n", + " \"question\": [],\n", + " \"answer\": [],\n", + " \"ground_truth\": []\n", + " }\n", + "\n", + "for i, question in enumerate(questions): \n", + " docs = retriever_multi_vector_img.get_relevant_documents(question, limit=10) \n", + " image_contexts = []\n", + " \n", + " source_docs = split_image_text_types(docs)\n", + " \n", + " if len(source_docs[\"images\"]) > 0: \n", + " for image in source_docs[\"images\"]:\n", + " image_contexts.append(summarize_image_context(image))\n", + " \n", + " text_context = source_docs[\"texts\"]\n", + " \n", + " data_samples[\"contexts\"].append(text_context + image_contexts)\n", + " data_samples[\"question\"].append(question)\n", + " data_samples[\"answer\"].append(chain_multimodal_rag.invoke(question))\n", + " data_samples[\"ground_truth\"].append(golden_answers[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import Dataset\n", + "\n", + "dataset = Dataset.from_dict(data_samples)\n", + "\n", + "\n", + "from ragas.metrics import (\n", + " context_precision,\n", + " answer_relevancy,\n", + " faithfulness,\n", + " context_recall,\n", + " answer_similarity,\n", + " answer_correctness,\n", + ")\n", + "from ragas.metrics.critique import harmfulness\n", + "\n", + "# list of metrics we're going to use\n", + "metrics = [\n", + " faithfulness,\n", + " answer_relevancy,\n", + " context_recall,\n", + " context_precision,\n", + " harmfulness,\n", + " answer_similarity,\n", + " answer_correctness,\n", + "]\n", + "\n", + "from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings\n", + "\n", + "config = { \n", + " \"chat_model_id\": \"gemini-1.0-pro-002\",\n", + " \"embedding_model_id\": \"textembedding-gecko\",\n", + "}\n", + "\n", + "\n", + "vertextai_llm = ChatVertexAI(model_name=config[\"chat_model_id\"],)\n", + "vertextai_embeddings = VertexAIEmbeddings(model_name=config[\"embedding_model_id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from ragas import evaluate\n", + "\n", + "result = evaluate(\n", + " dataset, # using 1 as example due to quota constrains\n", + " metrics=metrics,\n", + " llm=vertextai_llm,\n", + " embeddings=vertextai_embeddings,\n", + ")\n", + "\n", + "result.to_pandas()" + ] + }, { "cell_type": "markdown", "metadata": { From 5012f60d9ad6f9ca43c636286d5018e79989c379 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 13 Jun 2024 17:06:10 -0300 Subject: [PATCH 18/21] add readme steps --- examples/genai-rag-multimodal/README.md | 102 ++++++++++++++++++++++ examples/genai-rag-multimodal/versions.tf | 7 ++ 2 files changed, 109 insertions(+) create mode 100644 examples/genai-rag-multimodal/versions.tf diff --git a/examples/genai-rag-multimodal/README.md b/examples/genai-rag-multimodal/README.md index 5f0c8421..63f1f0a3 100644 --- a/examples/genai-rag-multimodal/README.md +++ b/examples/genai-rag-multimodal/README.md @@ -59,6 +59,108 @@ When running the Notebook, you will reach a step that downloads an example PDF f - projects/200612033880 # Google Cloud Example Project ``` +## Deploying infrastructure using Machine Learning Infra Pipeline + +### Required Permissions for pipeline Service Account + +- Give `roles/compute.networkUser` to the Service Account that runs the Pipeline. + + ```bash + SERVICE_ACCOUNT=$(terraform -chdir="./gcp-projects/ml_business_unit/shared" output -json terraform_service_accounts | jq -r '."ml-machine-learning"') + + gcloud projects add-iam-policy-binding --member="serviceAccount:$SERVICE_ACCOUNT" --role="roles/compute.networkUser" + ``` + +- Add the following ingress rule to the Service Perimeter. + + ```yaml + ingressPolicies: + - ingressFrom: + identities: + - serviceAccount: + sources: + - accessLevel: '*' + ingressTo: + operations: + - serviceName: '*' + resources: + - '*' + ``` + +### Deployment steps + +**IMPORTANT:** Please note that the steps below are assuming you are checked out on the same level as `terraform-google-enterprise-genai/` and the other repos (`gcp-bootstrap`, `gcp-org`, `gcp-projects`...). + +- Retrieve the Project ID where the Machine Learning Pipeline Repository is located in. + + ```bash + export INFRA_PIPELINE_PROJECT_ID=$(terraform -chdir="gcp-projects/ml_business_unit/shared/" output -raw cloudbuild_project_id) + echo ${INFRA_PIPELINE_PROJECT_ID} + ``` + +- Clone the repository. + + ```bash + gcloud source repos clone ml-machine-learning --project=${INFRA_PIPELINE_PROJECT_ID} + ``` + +- Navigate into the repo and the desired branch. Create directories if they don't exist. + + ```bash + cd ml-machine-learning + git checkout -b development + + mkdir -p ml_business_unit/development + mkdir -p modules + ``` + +- Copy required files to the repository. + + ```bash + cp -R ../terraform-google-enterprise-genai/examples/genai-rag-multimodal ./modules + cp ../terraform-google-enterprise-genai/build/cloudbuild-tf-* . + cp ../terraform-google-enterprise-genai/build/tf-wrapper.sh . + chmod 755 ./tf-wrapper.sh + + cat ../terraform-google-enterprise-genai/examples/genai-rag-multimodal/terraform.tfvars >> ml_business_unit/development/genai_example.auto.tfvars + cat ../terraform-google-enterprise-genai/examples/genai-rag-multimodal/variables.tf >> ml_business_unit/development/variables.tf + ``` + + > NOTE: Make sure there are no variable name collision for variables under `terraform-google-enterprise-genaiexamples/genai-rag-multimodal/variables.tf` and that your `terraform.tfvars` is updated with values from your environment. + +- Validate that variables under `ml_business_unit/development/genai_example.auto.tfvars` are correct. + + ```bash + cat ml_business_unit/development/genai_example.auto.tfvars + ``` + +- Create a file named `genai_example.tf` under `ml_business_unit/development` path that calls the module. + + ```terraform + module "genai_example" { + source = "../../modules/genai-rag-multimodal" + + kms_key = var.kms_key + network = var.network + subnet = var.subnet + machine_learning_project = var.machine_learning_project + vector_search_vpc_project = var.vector_search_vpc_project + } + ``` + +- Commit and push + + ```terraform + git add . + git commit -m "Add GenAI example" + + git push origin development + ``` + +## Deploying infrastructure using terraform locally + +Run `terraform init && terraform apply -auto-approve`. + ## Usage Once all the requirements are set up, you can start by running and adjusting the notebook step-by-step. diff --git a/examples/genai-rag-multimodal/versions.tf b/examples/genai-rag-multimodal/versions.tf new file mode 100644 index 00000000..103800f9 --- /dev/null +++ b/examples/genai-rag-multimodal/versions.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + google = { + version = "~> 5.33.0" + } + } +} From 61d4ecf8457dc2663ddd7777381ff364f7570b17 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Thu, 13 Jun 2024 17:23:10 -0300 Subject: [PATCH 19/21] add missing header --- examples/genai-rag-multimodal/versions.tf | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/examples/genai-rag-multimodal/versions.tf b/examples/genai-rag-multimodal/versions.tf index 103800f9..c76928c9 100644 --- a/examples/genai-rag-multimodal/versions.tf +++ b/examples/genai-rag-multimodal/versions.tf @@ -1,3 +1,19 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + terraform { required_providers { google = { From d981e1a6ef4ab711e48d2f7ecf16aa898c0df080 Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Tue, 18 Jun 2024 11:26:14 -0300 Subject: [PATCH 20/21] update version 5.34 --- examples/genai-rag-multimodal/versions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/genai-rag-multimodal/versions.tf b/examples/genai-rag-multimodal/versions.tf index c76928c9..c7847f29 100644 --- a/examples/genai-rag-multimodal/versions.tf +++ b/examples/genai-rag-multimodal/versions.tf @@ -17,7 +17,7 @@ terraform { required_providers { google = { - version = "~> 5.33.0" + version = "~> 5.34.0" } } } From 10f8c0a2dee0660f19345e7644171dc6efcdb49e Mon Sep 17 00:00:00 2001 From: caetano-colin Date: Wed, 19 Jun 2024 10:46:19 -0300 Subject: [PATCH 21/21] update outputs.tf --- examples/genai-rag-multimodal/outputs.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/genai-rag-multimodal/outputs.tf b/examples/genai-rag-multimodal/outputs.tf index 7ad847ca..1cd4ae7a 100644 --- a/examples/genai-rag-multimodal/outputs.tf +++ b/examples/genai-rag-multimodal/outputs.tf @@ -26,12 +26,12 @@ output "host_vpc_project_id" { output "host_vpc_network" { description = "This is the Self-link of the Host VPC network" - value = var.network + value = google_workbench_instance.instance.gce_setup[0].network_interfaces[0].network } output "notebook_project_id" { description = "The Project ID where the notebook will be run on" - value = var.machine_learning_project + value = google_workbench_instance.instance.project } output "vector_search_bucket_name" {