From a225d63475bacc7888f6c63d69f1c9200e2c34a6 Mon Sep 17 00:00:00 2001
From: Adam Hendel <ChuckHend@users.noreply.github.com>
Date: Wed, 13 Nov 2024 16:31:28 -0600
Subject: [PATCH 1/4] separate job for ./core (#186)

* separate job for ./core
---
 .github/workflows/extension_ci.yml | 39 ++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/extension_ci.yml b/.github/workflows/extension_ci.yml
index 97100c7..e7af5f8 100644
--- a/.github/workflows/extension_ci.yml
+++ b/.github/workflows/extension_ci.yml
@@ -75,12 +75,9 @@ jobs:
       - name: Clippy
         run: cargo clippy
 
-  test:
-    name: Run tests
-    needs: dependencies
+  test-core:
     runs-on: ubuntu-24.04
     services:
-      # Label used to access the service container
       vector-serve:
         image: quay.io/tembo/vector-serve:latest
         ports:
@@ -99,10 +96,7 @@ jobs:
             /home/runner/.pgrx
       - name: Install sys dependencies
         run: |
-          sudo apt-get update && sudo apt-get install -y postgresql-server-dev-16 libopenblas-dev libreadline-dev
-      - uses: ./.github/actions/pgx-init
-        with:
-          working-directory: ./extension
+          sudo apt-get update && sudo apt-get install -y postgresql postgresql-contrib libopenblas-dev libreadline-dev
       - name: Test Core
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -112,6 +106,35 @@ jobs:
           VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
         run: |
           cd ../core && cargo test
+
+  test:
+    name: Run tests
+    needs: dependencies
+    runs-on: ubuntu-24.04
+    services:
+      # Label used to access the service container
+      vector-serve:
+        image: quay.io/tembo/vector-serve:latest
+        ports:
+          - 3000:3000
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust stable toolchain
+        uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+        with:
+          prefix-key: "extension-test"
+          workspaces: |
+            vectorize
+          # Additional directories to cache
+          cache-directories: |
+            /home/runner/.pgrx
+      - name: Install sys dependencies
+        run: |
+          sudo apt-get update && sudo apt-get install -y postgresql-server-dev-16 libopenblas-dev libreadline-dev
+      - uses: ./.github/actions/pgx-init
+        with:
+          working-directory: ./extension
       - name: Restore cached binaries
         uses: actions/cache@v2
         with:

From 425d9f3f55c166d19550a25ecb9743008036b244 Mon Sep 17 00:00:00 2001
From: Joshua Jerin <joshuaxjerin@gmail.com>
Date: Thu, 14 Nov 2024 13:29:17 -0500
Subject: [PATCH 2/4] Update Readme and Contributing.md File (#185)

* update chat-completion link

* remove embedding_svc_url change

* extension version fix

* Update CONTRIBUTING.md
---
 CONTRIBUTING.md | 103 +++++++++---------------------------------------
 README.md       |  20 +++++++---
 2 files changed, 33 insertions(+), 90 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c83bbba..11a22f5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,7 @@
 # Contributing to pg_vectorize
 
+If you encounter any issues or have questions, feel free to join the [Tembo Community Slack](https://join.slack.com/t/tembocommunity/shared_invite/zt-2u3ctm86u-XzcyL76T7o~7Mpnt6KUx1g) for support.
+
 ## Prerequisites
 
 - [Rust](https://www.rust-lang.org/learn/get-started) - Toolchain including `rustc`, `cargo`, and `rustfmt`
@@ -32,52 +34,9 @@ Confirm a successful set up by running the following:
 docker ps
 ```
 
-:wrench: Note: Consider running the following to see the container logs real time:
-
-```bash
-docker logs <your-container-id> -f
-```
-
 ### 3. Clone and compile `pg_vectorize` and extension dependencies
 
-:wrench: When progressing through these steps, refer to the following for troubleshooting:
-
-```bash
-cat ~/.pgrx/15.log
-```
-
-#### 3.1. Apply configurations
-
-Prior to compiling and running `pg_vectorize`, it's essential to update the `postgresql.conf` file.
-`pgrx` uses a Postgres version-specific data directory, each containing its own `postgresql.conf` file.
-The following example, utilizes Postgres version 15.
-If you're using a different version, please alter the file path value `data-<postgres-version>` and run the following:
-
-```bash
-<your-editor> ~/.pgrx/data-15/postgresql.conf
-```
-
-Within this document, add the following:
-
-```text
-shared_preload_libraries = 'pg_cron, vectorize'
-cron.database_name = 'postgres'
-vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'
-```
-
-:wrench: Note: If your machine is running a MacOS, you may need to apply the following configurations to Cargo's config file:
-
-```
-<your-editor> ~/.cargo/config
-```
-
-```text
-[target.'cfg(target_os="macos")']
-# Postgres symbols won't be available until runtime
-rustflags = ["-Clink-arg=-Wl,-undefined,dynamic_lookup"]
-```
-
-#### 3.2. Clone and enter directory
+#### 3.1. Clone and enter directory
 
 ```bash
 git clone https://github.com/tembo-io/pg_vectorize.git
@@ -85,7 +44,7 @@ git clone https://github.com/tembo-io/pg_vectorize.git
 cd pg_vectorize/extension
 ```
 
-#### 3.3. Install dependencies
+#### 3.2. Install dependencies
 
 From within the pg_vectorize/extension directory, run the following, which will install `pg_cron`, `pgmq`, and `pgvector`:
 
@@ -93,7 +52,7 @@ From within the pg_vectorize/extension directory, run the following, which will
 make setup
 ```
 
-#### 3.4. Compile and run `pg_vectorize`
+#### 3.3. Compile and run `pg_vectorize`
 
 ```bash
 make run
@@ -124,61 +83,29 @@ To list out the enabled extensions, run:
  pgmq       | 1.1.1   | pgmq       | A lightweight message queue. Like AWS SQS and RSMQ but on Postgres.
  plpgsql    | 1.0     | pg_catalog | PL/pgSQL procedural language
  vector     | 0.6.0   | public     | vector data type and ivfflat and hnsw access methods
- vectorize  | 0.10.1  | vectorize  | The simplest way to do vector search on Postgres
+ vectorize  | 0.19.0  | vectorize  | The simplest way to do vector search on Postgres
 (6 rows)
 ```
 
 #### 4.2 Confirm embedding service url is set to localhost
 
-In section 3.1., we set the following postgresql.conf variable:
-
-```text
-vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'
-```
-
-To confirm its success, run the following SHOW command:
-
-```sql
-SHOW vectorize.embedding_service_url;
-```
-```text
-   vectorize.embedding_service_url
--------------------------------------
- http://localhost:3000/v1/embeddings
-(1 row)
-```
-
-Say, for example, instead of local host, `vector-serve:3000` was the target?
-Should you desire to change this from within Postgre, simply run:
-
-```
-ALTER SYSTEM SET vectorize.embedding_service_url TO 'http://localhost:3000/v1/embeddings';
-```
-
-Making changes such as this requires the following to be run:
-
-```sql
-SELECT pg_reload_conf();
-```
-
-Running the earlier SHOW command should reveal the appropriate change:
+Run the following SHOW command to confirm that the url is set to `localhost`:
 
 ```sql
 SHOW vectorize.embedding_service_url;
 ```
-
 ```text
    vectorize.embedding_service_url
 -------------------------------------
- http://localhost:3000/v1/embeddings
+ http://localhost:3000/v1
 (1 row)
 ```
 
 #### 4.3. Load example data
 
-The following can be found within the this project's README, under [Hugging Face Example](https://github.com/tembo-io/pg_vectorize/blob/main/README.md#hugging-face-example).
+The following can be found within the this project's README, under [Vector Search Example](https://github.com/tembo-io/pg_vectorize/blob/main/README.md#vector-search-example).
 
-Begin by creating a `producs` table with the dataset that comes included with `pg_vectorize`.
+Begin by creating a `products` table with the dataset that comes included with `pg_vectorize`.
 
 ```sql
 CREATE TABLE products (LIKE vectorize.example_products INCLUDING ALL);
@@ -236,9 +163,17 @@ num_results => 3
 
 ### 5. Local URL
 
-Once all of the following is complete, you should be able to visit the `Tembo-Embedding-Service` at [http://localhost:3000/docs](http://localhost:3000/docs) and explore.
+Once all of the following is complete, you should be able to access Swagger UI for `Tembo-Embedding-Service` at [http://localhost:3000/docs](http://localhost:3000/docs) and explore.
 This is a platform that allows, for example, the input of [different sentence-transformers models](https://huggingface.co/models?sort=trending&search=sentence-transformers) from Hugging Face.
 
+## TroubleShooting
+
+To check `pgrx` logs for debugging:
+
+```bash
+cat ~/.pgrx/17.log
+```
+
 # Releases
 
 `pg_vectorize` releases are automated through a [Github workflow](https://github.com/tembo-io/pg_vectorize/blob/main/.github/workflows/extension_ci.yml).
diff --git a/README.md b/README.md
index 26f3d30..d40fb6f 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ pg_vectorize powers the [VectorDB Stack](https://tembo.io/docs/product/stacks/ai
 ## Features
 
 - Workflows for both vector search and RAG
-- Integrations with OpenAI's [embeddings](https://platform.openai.com/docs/guides/embeddings) and [chat-completion](https://platform.openai.com/docs/guides/text-generation) endpoints and a self-hosted container for running [Hugging Face Sentence-Transformers](https://huggingface.co/sentence-transformers)
+- Integrations with OpenAI's [embeddings](https://platform.openai.com/docs/guides/embeddings) and [Text-Generation](https://platform.openai.com/docs/guides/text-generation) endpoints and a self-hosted container for running [Hugging Face Sentence-Transformers](https://huggingface.co/sentence-transformers)
 - Automated creation of Postgres triggers to keep your embeddings up to date
 - High level API - one function to initialize embeddings transformations, and another function to search
 
@@ -88,14 +88,14 @@ Then set the following either in postgresql.conf or as a configuration parameter
 ```sql
 -- requires restart of Postgres
 alter system set shared_preload_libraries = 'vectorize,pg_cron';
-alter system set cron.database_name = 'postgres'
+alter system set cron.database_name = 'postgres';
 ```
 
 And if you're running the vector-serve container, set the following url as a configuration parameter in Postgres.
  The host may need to change from `localhost` to something else depending on where you are running the container.
 
 ```sql
-alter system set vectorize.embedding_service_url = 'http://localhost:3000/v1/embeddings'
+alter system set vectorize.embedding_service_url = 'http://localhost:3000/v1';
 
 SELECT pg_reload_conf();
 ```
@@ -104,7 +104,7 @@ SELECT pg_reload_conf();
 
 ## Vector Search Example
 
-Text-to-embedding transformation can be done with either Hugging Face's Sentence-Transformers or OpenAI's embeddings. The following examples use Hugging Face's Sentence-Transformers. See the project [documentation](https://tembo-io.github.io/pg_vectorize/) for OpenAI examples.
+Text-to-embedding transformation can be done with either Hugging Face's Sentence-Transformers or OpenAI's embeddings. The following examples use Hugging Face's Sentence-Transformers. See the project [documentation](https://tembo.io/pg_vectorize/examples/openai_embeddings/) for OpenAI examples.
 
 Follow the [installation](#installation) steps if you haven't already.
 
@@ -190,7 +190,7 @@ ADD COLUMN context TEXT GENERATED ALWAYS AS (product_name || ': ' || description
 ```
 
 Initialize the RAG project.
- We'll use the `sentence-transformers/all-MiniLM-L6-v2` model to generate embeddings on our source documents.
+ We'll use the `openai/text-embedding-3-small` model to generate embeddings on our source documents.
 
 ```sql
 SELECT vectorize.init_rag(
@@ -198,7 +198,7 @@ SELECT vectorize.init_rag(
     table_name          => 'products',
     "column"            => 'context',
     unique_record_id    => 'product_id',
-    transformer         => 'sentence-transformers/all-MiniLM-L6-v2'
+    transformer         => 'openai/text-embedding-3-small'
 );
 ```
 
@@ -286,3 +286,11 @@ select vectorize.encode(
 {0.0028769304,-0.005826319,-0.0035932811, ...}
 ```
 
+## Contributing
+
+We welcome contributions from the community! If you're interested in contributing to `pg_vectorize`, please check out our [Contributing Guide](CONTRIBUTING.md). Your contributions help make this project better for everyone.
+
+## Community Support
+
+If you encounter any issues or have any questions, feel free to join our [Tembo Community Slack](https://join.slack.com/t/tembocommunity/shared_invite/zt-2u3ctm86u-XzcyL76T7o~7Mpnt6KUx1g). We're here to help!
+

From 4b76996f2a5622005743736fabbcf3b763d708e8 Mon Sep 17 00:00:00 2001
From: Adam Hendel <ChuckHend@users.noreply.github.com>
Date: Mon, 25 Nov 2024 15:27:41 -0600
Subject: [PATCH 3/4] force build and publish pg17 to trunk (#189)

* force build and publish pg17 to trunk

* update CODEOWNERS
---
 .github/CODEOWNERS                 | 2 +-
 .github/workflows/extension_ci.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 5934fb6..0f9e029 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1 @@
-*       @ChuckHend @shhnwz @jasonmp85
+*       @ChuckHend
diff --git a/.github/workflows/extension_ci.yml b/.github/workflows/extension_ci.yml
index e7af5f8..166a761 100644
--- a/.github/workflows/extension_ci.yml
+++ b/.github/workflows/extension_ci.yml
@@ -167,7 +167,7 @@ jobs:
     runs-on: ubuntu-24.04
     strategy:
       matrix:
-        pg-version: [14, 15, 16]
+        pg-version: [14, 15, 16, 17]
     steps:
       - uses: actions/checkout@v2
       - name: Install Rust stable toolchain

From 4a365d3aaafa3965a3eae5f11a78d039e6ea54f8 Mon Sep 17 00:00:00 2001
From: Akhilender Bongirwar
 <112749383+akhilender-bongirwar@users.noreply.github.com>
Date: Fri, 13 Dec 2024 07:14:00 +0530
Subject: [PATCH 4/4] feat: Add event trigger to handle job cleanup on table
 drop in vectorize schema (#178)

* feat: Add event trigger to handle job cleanup on table drop in vectorize schema

Signed-off-by: Akhilender <akhilenderb9@gmail.com>

* fix: made requested changes-1

* fix: added corresponding integration test

* fix: minor-fix in meta.sql file

* fix: integration_test test_event_trigger_on_table_drop

* fix: remove debugging notices

---------

Signed-off-by: Akhilender <akhilenderb9@gmail.com>
---
 extension/sql/meta.sql               | 27 ++++++++++++
 extension/tests/integration_tests.rs | 63 ++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/extension/sql/meta.sql b/extension/sql/meta.sql
index 0570668..322a67f 100644
--- a/extension/sql/meta.sql
+++ b/extension/sql/meta.sql
@@ -20,6 +20,33 @@ GRANT SELECT ON ALL SEQUENCES IN SCHEMA vectorize TO pg_monitor;
 ALTER DEFAULT PRIVILEGES IN SCHEMA vectorize GRANT SELECT ON TABLES TO pg_monitor;
 ALTER DEFAULT PRIVILEGES IN SCHEMA vectorize GRANT SELECT ON SEQUENCES TO pg_monitor;
 
+CREATE OR REPLACE FUNCTION handle_table_drop()
+RETURNS event_trigger AS $$
+DECLARE
+    obj RECORD;
+    schema_name TEXT;
+    table_name TEXT;
+BEGIN
+    FOR obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP
+        IF obj.object_type = 'table' THEN
+            schema_name := split_part(obj.object_identity, '.', 1);  
+            table_name := split_part(obj.object_identity, '.', 2);  
+            
+            -- Perform cleanup: delete the associated job from the vectorize.job table
+            DELETE FROM vectorize.job
+            WHERE params ->> 'table' = table_name
+            AND params ->> 'schema' = schema_name;
+        END IF;
+    END LOOP;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP EVENT TRIGGER IF EXISTS vectorize_job_drop_trigger;
+
+CREATE EVENT TRIGGER vectorize_job_drop_trigger
+ON sql_drop
+WHEN TAG IN ('DROP TABLE')
+EXECUTE FUNCTION handle_table_drop();
 
 INSERT INTO vectorize.prompts (prompt_type, sys_prompt, user_prompt)
 VALUES (
diff --git a/extension/tests/integration_tests.rs b/extension/tests/integration_tests.rs
index 6361682..8828b59 100644
--- a/extension/tests/integration_tests.rs
+++ b/extension/tests/integration_tests.rs
@@ -860,3 +860,66 @@ async fn test_cohere() {
             .unwrap();
     assert_eq!(search_results.len(), 3);
 }
+
+#[ignore]
+#[tokio::test]
+async fn test_event_trigger_on_table_drop() {
+    let conn = common::init_database().await;
+    let mut rng = rand::thread_rng();
+    let test_num = rng.gen_range(1..100000);
+    let test_table_name = format!("products_test_{}", test_num);
+    let job_name = format!("job_{}", test_num);
+
+    // Initialize the test table and job
+    common::init_test_table(&test_table_name, &conn).await;
+    common::init_embedding_svc_url(&conn).await;
+
+    let _ = sqlx::query(&format!(
+        "SELECT vectorize.table(
+        job_name => '{job_name}',
+        \"table\" => '{test_table_name}',
+        primary_key => 'product_id',
+        columns => ARRAY['product_name'],
+        transformer => 'sentence-transformers/all-MiniLM-L6-v2'
+    );"
+    ))
+    .execute(&conn)
+    .await
+    .expect("failed to initialize vectorize job");
+
+    // Check the job table before dropping the test table
+    let job_count_before = common::row_count("vectorize.job", &conn).await;
+    assert_eq!(job_count_before, 1);
+
+    // Drop the test table
+    let drop_result = sqlx::query(&format!("DROP TABLE {test_table_name} CASCADE;"))
+        .execute(&conn)
+        .await;
+    assert!(drop_result.is_ok(), "Failed to drop the test table");
+
+    // Debug: Check job table after dropping the test table
+    let job_count_after = common::row_count("vectorize.job", &conn).await;
+    assert_eq!(job_count_after, 0, "Job entry was not removed after table drop");
+
+    // Check if the job was deleted
+    let deleted_job = sqlx::query("SELECT * FROM vectorize.job WHERE params->>'table' = $1 AND params->>'schema' = $2")
+        .bind(test_table_name)
+        .bind("public")
+        .fetch_optional(&conn)
+        .await
+        .expect("Failed to fetch job");
+
+    assert!(deleted_job.is_none(), "Job was not deleted after table drop");
+
+    // Attempt to drop a non-associated table and verify no action is taken
+    let unrelated_table_name = format!("unrelated_test_{}", test_num);
+    common::init_test_table(&unrelated_table_name, &conn).await;
+    let _ = sqlx::query(&format!("DROP TABLE {unrelated_table_name};"))
+        .execute(&conn)
+        .await
+        .expect("Failed to drop the unrelated test table");
+
+    // Ensure vectorize.job is unaffected
+    let final_job_count = common::row_count("vectorize.job", &conn).await;
+    assert_eq!(final_job_count, 0, "vectorize.job should remain unaffected by unrelated table drops");
+}