Skip to content

Commit

Permalink
build documentation to faiss_embedding
Browse files Browse the repository at this point in the history
  • Loading branch information
george1459 committed Apr 30, 2024
1 parent 0c2212a commit fee30fe
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
1 change: 1 addition & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
# ADJUST THIS: build your documentation into docs/.
# We use a custom build script for pdoc itself, ideally you just run `pdoc -o docs/ ...` here.
- run: pdoc -o docs_pdocs/ suql.sql_free_text_support.execute_free_text_sql --html
- run: pdoc -o docs_pdocs/ suql.faiss_embedding --html

- uses: actions/upload-pages-artifact@v3
with:
Expand Down
44 changes: 40 additions & 4 deletions src/suql/faiss_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def __init__(
self.psql_row_ids = []
self.all_free_text = []
self.embeddings = None

assert chunking_param >= 0
self.chunking_param = chunking_param
self.chunked_text = []

Expand Down Expand Up @@ -406,16 +408,50 @@ def __init__(self) -> None:

def add(
self,
table_name,
primary_key_field_name,
free_text_field_name,
db_name,
table_name: str,
primary_key_field_name: str,
free_text_field_name: str,
db_name: str,
user="select_user",
password="select_user",
chunking_param=0,
cache_embedding=True,
force_recompute=False
):
"""
Add a free text field to the SUQL embedding store to make it
available to the compiler.
# Parameters:
`table_name` (str): Table name of the free text field.
`primary_key_field_name` (str): Primary key field name of the table `table_name`.
`free_text_field_name` (str): Free text field name to be embedded.
`db_name` (str): PostgreSQL database name of the table `table_name`.
`user` (str, optional): User name with `SELECT` privilege on the table `table_name`.
Defaults to "select_user".
`password` (str, optional): Password for the above user with `SELECT` privilege
on the table `table_name`. Defaults to "select_user".
`chunking_param` (int, optional): Chunking parameter for embedding.
It denotes how many tokens (determined by `en_core_web_sm`) to chunk to for each
free text value. Defaults to 0, which denotes no chunking.
`cache_embedding` (bool, optional): Whether to cache embeddings to disk. If cached,
this file computes a hash of the free text values. If the database values remains
unchanged, this file will directly use the cached embeddings. If there are changes
to the underlying values, this file will recompute the embeddings.
Defaults to True.
`force_recompute` (bool, optional): Whether to force recomputing embeddings.
If set to True, this file will re-compute the embedding even if a cache exists
for the underlying values. Defaults to False.
"""
if (
table_name in self.mapping
and free_text_field_name in self.mapping[table_name]
Expand Down

0 comments on commit fee30fe

Please sign in to comment.