Merge pull request #241 from nitya/aitour-2025-refresh

Aitour 2025 refresh
Azure-Samples · Jan 24, 2025 · 4bcb5a2 · 4bcb5a2
2 parents 8c1c3dc + 136a287
commit 4bcb5a2
Show file tree

Hide file tree

Showing 60 changed files with 1,544 additions and 647 deletions.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -7,17 +7,13 @@
         "context": ".."
     },
     "features": {
-        "ghcr.io/devcontainers/features/azure-cli:1": {
-            "installBicep": true,
-            "extensions": "ml"
-        },
+        "ghcr.io/devcontainers/features/azure-cli:1": {},
         "ghcr.io/devcontainers/features/git:1": {},
         "ghcr.io/azure/azure-dev/azd:latest": {},
         "ghcr.io/devcontainers/features/docker-in-docker:2": {},
         "ghcr.io/devcontainers/features/github-cli:1": {},
-        "ghcr.io/devcontainers/features/node:1": {
-            "version": "22.8.0"
-        }
+        "ghcr.io/devcontainers/features/node:1": {},
+        "ghcr.io/azure/azure-dev/azd:0": {}
     },
     "customizations": {
         "vscode": {

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,3 +1,4 @@
 {
-  "python.terminal.activateEnvironment": true
+  "python.terminal.activateEnvironment": true,
+  "prompty.currentModelConfiguration": "default"
 }
diff --git a/azure.yaml b/azure.yaml
@@ -12,12 +12,12 @@ hooks:
   postprovision:
     posix:
       shell: sh
-      continueOnError: false
+      continueOnError: true
       interactive: true
       run: infra/hooks/postprovision.sh
     windows:
       shell: pwsh
-      continueOnError: false
+      continueOnError: true
       interactive: true
       run: infra/hooks/postprovision.ps1
 infra:

diff --git a/data/scripts/create-azure-search.py b/data/scripts/create-azure-search.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # Generating your product search index
+# Thereis notebook is designed to automatically create the product search index for you. It uses the [product catalog](products.csv) file to create the index. In order to do so it needs names ane keys for the following services:
+# 
+# - Azure Search Service
+# - Azure OpenAI Service
+# 
+# You can find the names and keys in the Azure Portal. These need to be entered in a `.env` file in the root of this repository. The `.env` file is not checked in to source control. You can use the [`.env.sample`](../../.env.sample) file as a template.
+
+# In[1]:
+
+
+import os
+import pandas as pd
+from azure.identity import DefaultAzureCredential
+from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+from azure.search.documents import SearchClient
+from azure.search.documents.indexes import SearchIndexClient
+from azure.search.documents.indexes.models import (
+    HnswParameters,
+    HnswAlgorithmConfiguration,
+    SemanticPrioritizedFields,
+    SearchableField,
+    SearchField,
+    SearchFieldDataType,
+    SearchIndex,
+    SemanticSearch,
+    SemanticConfiguration,
+    SemanticField,
+    SimpleField,
+    VectorSearch,
+    VectorSearchAlgorithmKind,
+    VectorSearchAlgorithmMetric,
+    ExhaustiveKnnAlgorithmConfiguration,
+    ExhaustiveKnnParameters,
+    VectorSearchProfile,
+)
+from typing import List, Dict
+from openai import AzureOpenAI
+from dotenv import load_dotenv
+
+from pathlib import Path
+
+load_dotenv()
+
+
+# In[2]:
+
+
+def delete_index(search_index_client: SearchIndexClient, search_index: str):
+    print(f"deleting index {search_index}")
+    search_index_client.delete_index(search_index)
+
+
+# In[3]:
+
+
+def create_index_definition(name: str) -> SearchIndex:
+    """
+    Returns an Azure Cognitive Search index with the given name.
+    """
+    # The fields we want to index. The "embedding" field is a vector field that will
+    # be used for vector search.
+    fields = [
+        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
+        SearchableField(name="content", type=SearchFieldDataType.String),
+        SimpleField(name="filepath", type=SearchFieldDataType.String),
+        SearchableField(name="title", type=SearchFieldDataType.String),
+        SimpleField(name="url", type=SearchFieldDataType.String),
+        SearchField(
+            name="contentVector",
+            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
+            searchable=True,
+            # Size of the vector created by the text-embedding-ada-002 model.
+            vector_search_dimensions=1536,
+            vector_search_profile_name="myHnswProfile",
+        ),
+    ]
+
+    # The "content" field should be prioritized for semantic ranking.
+    semantic_config = SemanticConfiguration(
+        name="default",
+        prioritized_fields=SemanticPrioritizedFields(
+            title_field=SemanticField(field_name="title"),
+            keywords_fields=[],
+            content_fields=[SemanticField(field_name="content")],
+        ),
+    )
+
+    # For vector search, we want to use the HNSW (Hierarchical Navigable Small World)
+    # algorithm (a type of approximate nearest neighbor search algorithm) with cosine
+    # distance.
+    vector_search = VectorSearch(
+        algorithms=[
+            HnswAlgorithmConfiguration(
+                name="myHnsw",
+                kind=VectorSearchAlgorithmKind.HNSW,
+                parameters=HnswParameters(
+                    m=4,
+                    ef_construction=400,
+                    ef_search=500,
+                    metric=VectorSearchAlgorithmMetric.COSINE,
+                ),
+            ),
+            ExhaustiveKnnAlgorithmConfiguration(
+                name="myExhaustiveKnn",
+                kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
+                parameters=ExhaustiveKnnParameters(
+                    metric=VectorSearchAlgorithmMetric.COSINE
+                ),
+            ),
+        ],
+        profiles=[
+            VectorSearchProfile(
+                name="myHnswProfile",
+                algorithm_configuration_name="myHnsw",
+            ),
+            VectorSearchProfile(
+                name="myExhaustiveKnnProfile",
+                algorithm_configuration_name="myExhaustiveKnn",
+            ),
+        ],
+    )
+
+    # Create the semantic settings with the configuration
+    semantic_search = SemanticSearch(configurations=[semantic_config])
+
+    # Create the search index.
+    index = SearchIndex(
+        name=name,
+        fields=fields,
+        semantic_search=semantic_search,
+        vector_search=vector_search,
+    )
+
+    return index
+
+
+# In[4]:
+
+
+def gen_contoso_products(
+    path: str,
+) -> List[Dict[str, any]]:
+    openai_service_endoint = os.environ["AZURE_OPENAI_ENDPOINT"]
+    openai_deployment = "text-embedding-ada-002"
+
+    token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
+    # openai.Embedding.create() -> client.embeddings.create()
+    client = AzureOpenAI(
+        api_version="2023-07-01-preview",
+        azure_endpoint=openai_service_endoint,
+        azure_deployment=openai_deployment,
+         azure_ad_token_provider=token_provider
+    )
+
+    products = pd.read_csv(path)
+    items = []
+    for product in products.to_dict("records"):
+        content = product["description"]
+        id = str(product["id"])
+        title = product["name"]
+        url = f"/products/{title.lower().replace(' ', '-')}"
+        emb = client.embeddings.create(input=content, model=openai_deployment)
+        rec = {
+            "id": id,
+            "content": content,
+            "filepath": f"{title.lower().replace(' ', '-')}",
+            "title": title,
+            "url": url,
+            "contentVector": emb.data[0].embedding,
+        }
+        items.append(rec)
+
+    return items
+
+
+# In[5]:
+
+
+contoso_search = os.environ["AZURE_SEARCH_ENDPOINT"]
+index_name = "contoso-products"
+
+search_index_client = SearchIndexClient(
+    contoso_search, DefaultAzureCredential()
+)
+
+delete_index(search_index_client, index_name)
+index = create_index_definition(index_name)
+print(f"creating index {index_name}")
+search_index_client.create_or_update_index(index)
+print(f"index {index_name} created")
+
+
+# In[6]:
+
+
+print(f"indexing documents")
+docs = gen_contoso_products("../product_info/products.csv")
+# Upload our data to the index.
+search_client = SearchClient(
+    endpoint=contoso_search,
+    index_name=index_name,
+    credential=DefaultAzureCredential(),
+)
+print(f"uploading {len(docs)} documents to index {index_name}")
+ds = search_client.upload_documents(docs)
+
diff --git a/data/scripts/create-cosmos-db.py b/data/scripts/create-cosmos-db.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+from azure.cosmos import CosmosClient, exceptions, PartitionKey
+from azure.identity import DefaultAzureCredential
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+# In[2]:
+
+
+# from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
+
+# try:
+#     credential = DefaultAzureCredential()
+#     # Check if given credential can get token successfully.
+#     credential.get_token("https://management.azure.com/.default")
+# except Exception as ex:
+#     # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
+#     # This will open a browser page for
+#     credential = InteractiveBrowserCredential()
+
+
+# In[3]:
+
+
+# Set the Cosmos DB endpoint, key and database name in the .env file. The key and endpoint can be found in the resource created in the portal.
+COSMOS_ENDPOINT = os.environ["COSMOS_ENDPOINT"]
+client = CosmosClient(COSMOS_ENDPOINT, credential=DefaultAzureCredential())
+DATABASE_NAME = 'contoso-outdoor'
+CONTAINER_NAME = 'customers'
+
+
+# In[4]:
+
+
+# Get the database and container created by Bicep
+database = client.get_database_client(DATABASE_NAME)
+container = database.get_container_client(CONTAINER_NAME)
+
+print(database)
+
+
+# In[5]:
+
+
+# Loop through each json file in data/customer_info and insert into container
+import os
+import json
+import glob
+path = './../customer_info'
+for filename in glob.glob(os.path.join(path, '*.json')):
+    with open(filename) as file:
+        customer = json.load(file)
+        container.upsert_item(customer)
+        print('Upserted item with id {0}'.format(customer['id']))
+
+
+# In[6]:
+
+
+# Get items from container to validate they were inserted
+print('Get all items in container')
+items = list(container.read_all_items(max_item_count=10))
+print(items)
+